From 6f95ae55b151f366396e7d99b829c27f27259a89 Mon Sep 17 00:00:00 2001
From: Georg Brandl <georg@python.org>
Date: Sun, 11 May 2008 10:42:28 +0000
Subject: #2659: add ``break_on_hyphens`` to TextWrapper.

---
 Doc/library/textwrap.rst  | 18 +++++++++++++++++-
 Lib/test/test_textwrap.py |  8 ++++++++
 Lib/textwrap.py           | 23 +++++++++++++++++++++--
 Misc/NEWS                 |  2 ++
 4 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst
index 7a52eb9b..a2db567 100644
--- a/Doc/library/textwrap.rst
+++ b/Doc/library/textwrap.rst
@@ -41,6 +41,10 @@ instance and calling a single method on it.  That instance is not reused, so for
 applications that wrap/fill many text strings, it will be more efficient for you
 to create your own :class:`TextWrapper` object.
 
+Text is preferably wrapped on whitespaces and right after the hyphens in
+hyphenated words; only then will long words be broken if necessary, unless
+:attr:`TextWrapper.break_long_words` is set to false.
+
 An additional utility function, :func:`dedent`, is provided to remove
 indentation from strings that have unwanted whitespace to the left of the text.
 
@@ -174,10 +178,22 @@ indentation from strings that have unwanted whitespace to the left of the text.
       than :attr:`width`.  (Long words will be put on a line by themselves, in
       order to minimize the amount by which :attr:`width` is exceeded.)
 
+
+   .. attribute:: break_on_hyphens
+
+      (default: ``True``) If true, wrapping will occur preferably on whitespaces
+      and right after hyphens in compound words, as it is customary in English.
+      If false, only whitespaces will be considered as potentially good places
+      for line breaks, but you need to set :attr:`break_long_words` to false if
+      you want truly insecable words.  Default behaviour in previous versions
+      was to always allow breaking hyphenated words.
+
+      .. versionadded:: 2.6
+
+
    :class:`TextWrapper` also provides two public methods, analogous to the
    module-level convenience functions:
 
-
    .. method:: wrap(text)
 
       Wraps the single paragraph in *text* (a string) so every line is at most
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index c1c09f6..1eab90c 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -364,6 +364,14 @@ What a mess!
              ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
               "ball,", " ", "use", " ", "the", " ", "-b", " ",  "option!"])
 
+    def test_break_on_hyphens(self):
+        # Ensure that the break_on_hyphens attributes work
+        text = "yaba daba-doo"
+        self.check_wrap(text, 10, ["yaba daba-", "doo"],
+                        break_on_hyphens=True)
+        self.check_wrap(text, 10, ["yaba", "daba-doo"],
+                        break_on_hyphens=False)
+
     def test_bad_width(self):
         # Ensure that width <= 0 is caught.
         text = "Whatever, it doesn't matter."
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index ffbb9d1..7cd0597 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -63,6 +63,10 @@ class TextWrapper:
       break_long_words (default: true)
         Break words longer than 'width'.  If false, those words will not
         be broken, and some lines might be longer than 'width'.
+      break_on_hyphens (default: true)
+        Allow breaking hyphenated words. If true, wrapping will occur
+        preferably on whitespaces and right after hyphens part of
+        compound words.
       drop_whitespace (default: true)
         Drop leading and trailing whitespace from lines.
     """
@@ -85,6 +89,12 @@ class TextWrapper:
         r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|'   # hyphenated words
         r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash
 
+    # This less funky little regex just split on recognized spaces. E.g.
+    #   "Hello there -- you goof-ball, use the -b option!"
+    # splits into
+    #   Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
+    wordsep_simple_re = re.compile(r'(\s+)')
+
     # XXX this is not locale- or charset-aware -- string.lowercase
     # is US-ASCII only (and therefore English-only)
     sentence_end_re = re.compile(r'[%s]'              # lowercase letter
@@ -102,7 +112,8 @@ class TextWrapper:
                  replace_whitespace=True,
                  fix_sentence_endings=False,
                  break_long_words=True,
-                 drop_whitespace=True):
+                 drop_whitespace=True,
+                 break_on_hyphens=True):
         self.width = width
         self.initial_indent = initial_indent
         self.subsequent_indent = subsequent_indent
@@ -111,6 +122,7 @@ class TextWrapper:
         self.fix_sentence_endings = fix_sentence_endings
         self.break_long_words = break_long_words
         self.drop_whitespace = drop_whitespace
+        self.break_on_hyphens = break_on_hyphens
 
 
     # -- Private methods -----------------------------------------------
@@ -143,8 +155,15 @@ class TextWrapper:
         breaks into the following chunks:
           'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
           'use', ' ', 'the', ' ', '-b', ' ', 'option!'
+        if break_on_hyphens is True, or in:
+          'Look,', ' ', 'goof-ball', ' ', '--', ' ',
+          'use', ' ', 'the', ' ', '-b', ' ', option!'
+        otherwise.
         """
-        chunks = self.wordsep_re.split(text)
+        if self.break_on_hyphens is True:
+            chunks = self.wordsep_re.split(text)
+        else:
+            chunks = self.wordsep_simple_re.split(text)
         chunks = filter(None, chunks)  # remove empty chunks
         return chunks
 
diff --git a/Misc/NEWS b/Misc/NEWS
index d796735..9007462 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -23,6 +23,8 @@ Extension Modules
 Library
 -------
 
+- #2659: Added ``break_on_hyphens`` option to textwrap TextWrapper class.
+
 - The mhlib module has been deprecated for removal in Python 3.0.
 
 - The linuxaudiodev module has been deprecated for removal in Python 3.0.
-- 
cgit v0.12