From 3eef44170013ca49e204a3a5fd1408adfaf49e0c Mon Sep 17 00:00:00 2001
From: Georg Brandl <georg@python.org>
Date: Sat, 27 Dec 2008 18:27:53 +0000
Subject: Follow-up to r67746 in order to restore backwards-compatibility for
 those who (monkey-)patch TextWrapper.wordsep_re with a custom RE.

---
 Lib/textwrap.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 192b43b..64a5b97 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -84,7 +84,7 @@ class TextWrapper:
     # splits into
     #   Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
     # (after stripping out empty strings).
-    wordsep_re = (
+    wordsep_re = re.compile(
         r'(\s+|'                                  # any whitespace
         r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|'   # hyphenated words
         r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash
@@ -93,7 +93,7 @@ class TextWrapper:
     #   "Hello there -- you goof-ball, use the -b option!"
     # splits into
     #   Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
-    wordsep_simple_re = r'(\s+)'
+    wordsep_simple_re = re.compile(r'(\s+)')
 
     # XXX this is not locale- or charset-aware -- string.lowercase
     # is US-ASCII only (and therefore English-only)
@@ -124,6 +124,13 @@ class TextWrapper:
         self.drop_whitespace = drop_whitespace
         self.break_on_hyphens = break_on_hyphens
 
+        # recompile the regexes for Unicode mode -- done in this clumsy way for
+        # backwards compatibility because it's rather common to monkey-patch
+        # the TextWrapper class' wordsep_re attribute.
+        self.wordsep_re_uni = re.compile(self.wordsep_re.pattern, re.U)
+        self.wordsep_simple_re_uni = re.compile(
+            self.wordsep_simple_re.pattern, re.U)
+
 
     # -- Private methods -----------------------------------------------
     # (possibly useful for subclasses to override)
@@ -160,12 +167,17 @@ class TextWrapper:
           'use', ' ', 'the', ' ', '-b', ' ', option!'
         otherwise.
         """
-        flags = re.UNICODE if isinstance(text, unicode) else 0
-        if self.break_on_hyphens:
-            pat = self.wordsep_re
+        if isinstance(text, unicode):
+            if self.break_on_hyphens:
+                pat = self.wordsep_re_uni
+            else:
+                pat = self.wordsep_simple_re_uni
         else:
-            pat = self.wordsep_simple_re
-        chunks = re.compile(pat, flags).split(text)
+            if self.break_on_hyphens:
+                pat = self.wordsep_re
+            else:
+                pat = self.wordsep_simple_re
+        chunks = pat.split(text)
         chunks = filter(None, chunks)  # remove empty chunks
         return chunks
 
-- 
cgit v0.12