summaryrefslogtreecommitdiffstats
path: root/Lib/textwrap.py
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2008-12-21 17:01:26 (GMT)
committerBenjamin Peterson <benjamin@python.org>2008-12-21 17:01:26 (GMT)
commitc6e80eb5eeee1170b770c456adac62eccb36ac71 (patch)
tree290e23fa2728632ba647e250fcefd7ef69e1db00 /Lib/textwrap.py
parente1ae3660846787824b76834feb8fef068b6b59ee (diff)
downloadcpython-c6e80eb5eeee1170b770c456adac62eccb36ac71.zip
cpython-c6e80eb5eeee1170b770c456adac62eccb36ac71.tar.gz
cpython-c6e80eb5eeee1170b770c456adac62eccb36ac71.tar.bz2
Merged revisions 67654,67676-67677,67681,67692,67725,67746,67748,67761,67784-67785,67787-67788,67802,67832,67848-67849,67859,67862-67864,67880,67882,67885,67889-67892,67895 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ................ r67654 | georg.brandl | 2008-12-07 16:42:09 -0600 (Sun, 07 Dec 2008) | 2 lines #4457: rewrite __import__() documentation. ................ r67676 | benjamin.peterson | 2008-12-08 20:03:03 -0600 (Mon, 08 Dec 2008) | 1 line specify how things are copied ................ r67677 | benjamin.peterson | 2008-12-08 20:05:11 -0600 (Mon, 08 Dec 2008) | 1 line revert unrelated change to installer script ................ r67681 | jeremy.hylton | 2008-12-09 15:03:10 -0600 (Tue, 09 Dec 2008) | 2 lines Add simple unittests for Request ................ r67692 | amaury.forgeotdarc | 2008-12-10 18:03:42 -0600 (Wed, 10 Dec 2008) | 2 lines #1030250: correctly pass the dry_run option to the mkpath() function. ................ r67725 | benjamin.peterson | 2008-12-12 22:02:20 -0600 (Fri, 12 Dec 2008) | 1 line fix incorrect example ................ r67746 | antoine.pitrou | 2008-12-13 17:12:30 -0600 (Sat, 13 Dec 2008) | 3 lines Issue #4163: Use unicode-friendly word splitting in the textwrap functions when given an unicode string. ................ r67748 | benjamin.peterson | 2008-12-13 19:46:11 -0600 (Sat, 13 Dec 2008) | 1 line remove has_key usage ................ r67761 | benjamin.peterson | 2008-12-14 11:26:04 -0600 (Sun, 14 Dec 2008) | 1 line fix missing bracket ................ r67784 | georg.brandl | 2008-12-15 02:33:58 -0600 (Mon, 15 Dec 2008) | 2 lines #4446: document "platforms" argument for setup(). ................ r67785 | georg.brandl | 2008-12-15 02:36:11 -0600 (Mon, 15 Dec 2008) | 2 lines #4611: fix typo. ................ r67787 | georg.brandl | 2008-12-15 02:58:59 -0600 (Mon, 15 Dec 2008) | 2 lines #4578: fix has_key() usage in compiler package. ................ r67788 | georg.brandl | 2008-12-15 03:07:39 -0600 (Mon, 15 Dec 2008) | 2 lines #4568: remove limitation in varargs callback example. ................ r67802 | amaury.forgeotdarc | 2008-12-15 16:29:14 -0600 (Mon, 15 Dec 2008) | 4 lines #3632: the "pyo" macro from gdbinit can now run when the GIL is released. Patch by haypo. ................ r67832 | antoine.pitrou | 2008-12-17 16:46:54 -0600 (Wed, 17 Dec 2008) | 4 lines Issue #2467: gc.DEBUG_STATS reports invalid elapsed times. Patch by Neil Schemenauer, very slightly modified. ................ r67848 | benjamin.peterson | 2008-12-18 20:28:56 -0600 (Thu, 18 Dec 2008) | 1 line fix typo ................ r67849 | benjamin.peterson | 2008-12-18 20:31:35 -0600 (Thu, 18 Dec 2008) | 1 line _call_method -> _callmethod and _get_value to _getvalue ................ r67859 | amaury.forgeotdarc | 2008-12-19 16:56:48 -0600 (Fri, 19 Dec 2008) | 4 lines #4700: crtlicense.txt is displayed by the license() command and should be kept ascii-only. Will port to 3.0 ................ r67862 | benjamin.peterson | 2008-12-19 20:48:02 -0600 (Fri, 19 Dec 2008) | 1 line copy sentence from docstring ................ r67863 | benjamin.peterson | 2008-12-19 20:51:26 -0600 (Fri, 19 Dec 2008) | 1 line add headings ................ r67864 | benjamin.peterson | 2008-12-19 20:57:19 -0600 (Fri, 19 Dec 2008) | 1 line beef up docstring ................ r67880 | benjamin.peterson | 2008-12-20 16:49:24 -0600 (Sat, 20 Dec 2008) | 1 line remove redundant sentence ................ r67882 | benjamin.peterson | 2008-12-20 16:59:49 -0600 (Sat, 20 Dec 2008) | 1 line add some recent releases to the list ................ r67885 | benjamin.peterson | 2008-12-20 17:48:54 -0600 (Sat, 20 Dec 2008) | 1 line silence annoying DeprecationWarning ................ r67889 | benjamin.peterson | 2008-12-20 19:04:32 -0600 (Sat, 20 Dec 2008) | 1 line sphinx.web is long gone ................ r67890 | benjamin.peterson | 2008-12-20 19:12:26 -0600 (Sat, 20 Dec 2008) | 1 line update readme ................ r67891 | benjamin.peterson | 2008-12-20 19:14:47 -0600 (Sat, 20 Dec 2008) | 1 line there are way too many places which need to have the current version added ................ r67892 | benjamin.peterson | 2008-12-20 19:29:32 -0600 (Sat, 20 Dec 2008) | 9 lines Merged revisions 67809 via svnmerge from svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3 ........ r67809 | benjamin.peterson | 2008-12-15 21:54:45 -0600 (Mon, 15 Dec 2008) | 1 line fix logic error ........ ................ r67895 | neal.norwitz | 2008-12-21 08:28:32 -0600 (Sun, 21 Dec 2008) | 2 lines Add Tarek for work on distutils. ................
Diffstat (limited to 'Lib/textwrap.py')
-rw-r--r--Lib/textwrap.py14
1 files changed, 8 insertions, 6 deletions
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 53f2f1b..192b43b 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -84,16 +84,16 @@ class TextWrapper:
# splits into
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
# (after stripping out empty strings).
- wordsep_re = re.compile(
+ wordsep_re = (
r'(\s+|' # any whitespace
- r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
+ r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
# This less funky little regex just split on recognized spaces. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
- wordsep_simple_re = re.compile(r'(\s+)')
+ wordsep_simple_re = r'(\s+)'
# XXX this is not locale- or charset-aware -- string.lowercase
# is US-ASCII only (and therefore English-only)
@@ -160,10 +160,12 @@ class TextWrapper:
'use', ' ', 'the', ' ', '-b', ' ', option!'
otherwise.
"""
- if self.break_on_hyphens is True:
- chunks = self.wordsep_re.split(text)
+ flags = re.UNICODE if isinstance(text, unicode) else 0
+ if self.break_on_hyphens:
+ pat = self.wordsep_re
else:
- chunks = self.wordsep_simple_re.split(text)
+ pat = self.wordsep_simple_re
+ chunks = re.compile(pat, flags).split(text)
chunks = filter(None, chunks) # remove empty chunks
return chunks