diff options
author | Georg Brandl <georg@python.org> | 2008-12-27 18:27:53 (GMT) |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2008-12-27 18:27:53 (GMT) |
commit | 3eef44170013ca49e204a3a5fd1408adfaf49e0c (patch) | |
tree | 566b012ac539ad1351c0045c51f2b07719214eb0 /Lib/textwrap.py | |
parent | 8d5934b25d7c3e636444428dc715316a08b8c94e (diff) | |
download | cpython-3eef44170013ca49e204a3a5fd1408adfaf49e0c.zip cpython-3eef44170013ca49e204a3a5fd1408adfaf49e0c.tar.gz cpython-3eef44170013ca49e204a3a5fd1408adfaf49e0c.tar.bz2 |
Follow-up to r67746 in order to restore backwards-compatibility for
those who (monkey-)patch TextWrapper.wordsep_re with a custom RE.
Diffstat (limited to 'Lib/textwrap.py')
-rw-r--r-- | Lib/textwrap.py | 26 |
1 files changed, 19 insertions, 7 deletions
diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 192b43b..64a5b97 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -84,7 +84,7 @@ class TextWrapper: # splits into # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! # (after stripping out empty strings). - wordsep_re = ( + wordsep_re = re.compile( r'(\s+|' # any whitespace r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash @@ -93,7 +93,7 @@ class TextWrapper: # "Hello there -- you goof-ball, use the -b option!" # splits into # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ - wordsep_simple_re = r'(\s+)' + wordsep_simple_re = re.compile(r'(\s+)') # XXX this is not locale- or charset-aware -- string.lowercase # is US-ASCII only (and therefore English-only) @@ -124,6 +124,13 @@ class TextWrapper: self.drop_whitespace = drop_whitespace self.break_on_hyphens = break_on_hyphens + # recompile the regexes for Unicode mode -- done in this clumsy way for + # backwards compatibility because it's rather common to monkey-patch + # the TextWrapper class' wordsep_re attribute. + self.wordsep_re_uni = re.compile(self.wordsep_re.pattern, re.U) + self.wordsep_simple_re_uni = re.compile( + self.wordsep_simple_re.pattern, re.U) + # -- Private methods ----------------------------------------------- # (possibly useful for subclasses to override) @@ -160,12 +167,17 @@ class TextWrapper: 'use', ' ', 'the', ' ', '-b', ' ', option!' otherwise. """ - flags = re.UNICODE if isinstance(text, unicode) else 0 - if self.break_on_hyphens: - pat = self.wordsep_re + if isinstance(text, unicode): + if self.break_on_hyphens: + pat = self.wordsep_re_uni + else: + pat = self.wordsep_simple_re_uni else: - pat = self.wordsep_simple_re - chunks = re.compile(pat, flags).split(text) + if self.break_on_hyphens: + pat = self.wordsep_re + else: + pat = self.wordsep_simple_re + chunks = pat.split(text) chunks = filter(None, chunks) # remove empty chunks return chunks |