summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/lib/libtextwrap.tex15
-rw-r--r--Lib/test/test_textwrap.py61
-rw-r--r--Lib/textwrap.py75
-rw-r--r--Misc/NEWS3
4 files changed, 109 insertions, 45 deletions
diff --git a/Doc/lib/libtextwrap.tex b/Doc/lib/libtextwrap.tex
index 9fb0816..38f9b03 100644
--- a/Doc/lib/libtextwrap.tex
+++ b/Doc/lib/libtextwrap.tex
@@ -47,12 +47,17 @@ remove indentation from strings that have unwanted whitespace to the
left of the text.
\begin{funcdesc}{dedent}{text}
-Remove any whitespace that can be uniformly removed from the left
-of every line in \var{text}.
+Remove any common leading whitespace from every line in \var{text}.
-This is typically used to make triple-quoted strings line up with
-the left edge of screen/whatever, while still presenting it in the
-source code in indented form.
+This can be used to make triple-quoted strings line up with the left
+edge of the display, while still presenting them in the source code
+in indented form.
+
+Note that tabs and spaces are both treated as whitespace, but they are
+not equal: the lines \code{" {} hello"} and \code{"\textbackslash{}thello"}
+are considered to have no common leading whitespace. (This behaviour is
+new in Python 2.5; older versions of this module incorrectly expanded
+tabs before searching for common leading whitespace.)
For example:
\begin{verbatim}
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index 68e4d6d..98cc869 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -460,38 +460,42 @@ some (including a hanging indent).'''
# of IndentTestCase!
class DedentTestCase(unittest.TestCase):
+ def assertUnchanged(self, text):
+ """assert that dedent() has no effect on 'text'"""
+ self.assertEquals(text, dedent(text))
+
def test_dedent_nomargin(self):
# No lines indented.
text = "Hello there.\nHow are you?\nOh good, I'm glad."
- self.assertEquals(dedent(text), text)
+ self.assertUnchanged(text)
# Similar, with a blank line.
text = "Hello there.\n\nBoo!"
- self.assertEquals(dedent(text), text)
+ self.assertUnchanged(text)
# Some lines indented, but overall margin is still zero.
text = "Hello there.\n This is indented."
- self.assertEquals(dedent(text), text)
+ self.assertUnchanged(text)
# Again, add a blank line.
text = "Hello there.\n\n Boo!\n"
- self.assertEquals(dedent(text), text)
+ self.assertUnchanged(text)
def test_dedent_even(self):
# All lines indented by two spaces.
text = " Hello there.\n How are ya?\n Oh good."
expect = "Hello there.\nHow are ya?\nOh good."
- self.assertEquals(dedent(text), expect)
+ self.assertEquals(expect, dedent(text))
# Same, with blank lines.
text = " Hello there.\n\n How are ya?\n Oh good.\n"
expect = "Hello there.\n\nHow are ya?\nOh good.\n"
- self.assertEquals(dedent(text), expect)
+ self.assertEquals(expect, dedent(text))
# Now indent one of the blank lines.
text = " Hello there.\n \n How are ya?\n Oh good.\n"
expect = "Hello there.\n\nHow are ya?\nOh good.\n"
- self.assertEquals(dedent(text), expect)
+ self.assertEquals(expect, dedent(text))
def test_dedent_uneven(self):
# Lines indented unevenly.
@@ -505,18 +509,53 @@ def foo():
while 1:
return foo
'''
- self.assertEquals(dedent(text), expect)
+ self.assertEquals(expect, dedent(text))
# Uneven indentation with a blank line.
text = " Foo\n Bar\n\n Baz\n"
expect = "Foo\n Bar\n\n Baz\n"
- self.assertEquals(dedent(text), expect)
+ self.assertEquals(expect, dedent(text))
# Uneven indentation with a whitespace-only line.
text = " Foo\n Bar\n \n Baz\n"
expect = "Foo\n Bar\n\n Baz\n"
- self.assertEquals(dedent(text), expect)
-
+ self.assertEquals(expect, dedent(text))
+
+ # dedent() should not mangle internal tabs
+ def test_dedent_preserve_internal_tabs(self):
+ text = " hello\tthere\n how are\tyou?"
+ expect = "hello\tthere\nhow are\tyou?"
+ self.assertEquals(expect, dedent(text))
+
+ # make sure that it preserves tabs when it's not making any
+ # changes at all
+ self.assertEquals(expect, dedent(expect))
+
+ # dedent() should not mangle tabs in the margin (i.e.
+ # tabs and spaces both count as margin, but are *not*
+ # considered equivalent)
+ def test_dedent_preserve_margin_tabs(self):
+ text = " hello there\n\thow are you?"
+ self.assertUnchanged(text)
+
+ # same effect even if we have 8 spaces
+ text = " hello there\n\thow are you?"
+ self.assertUnchanged(text)
+
+ # dedent() only removes whitespace that can be uniformly removed!
+ text = "\thello there\n\thow are you?"
+ expect = "hello there\nhow are you?"
+ self.assertEquals(expect, dedent(text))
+
+ text = " \thello there\n \thow are you?"
+ self.assertEquals(expect, dedent(text))
+
+ text = " \t hello there\n \t how are you?"
+ self.assertEquals(expect, dedent(text))
+
+ text = " \thello there\n \t how are you?"
+ expect = "hello there\n how are you?"
+ self.assertEquals(expect, dedent(text))
def test_main():
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 7c68280..e18000a 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -317,41 +317,58 @@ def fill(text, width=70, **kwargs):
# -- Loosely related functionality -------------------------------------
-def dedent(text):
- """dedent(text : string) -> string
-
- Remove any whitespace than can be uniformly removed from the left
- of every line in `text`.
+_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
+_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
- This can be used e.g. to make triple-quoted strings line up with
- the left edge of screen/whatever, while still presenting it in the
- source code in indented form.
+def dedent(text):
+ """Remove any common leading whitespace from every line in `text`.
- For example:
+ This can be used to make triple-quoted strings line up with the left
+ edge of the display, while still presenting them in the source code
+ in indented form.
- def test():
- # end first line with \ to avoid the empty line!
- s = '''\
- hello
- world
- '''
- print repr(s) # prints ' hello\n world\n '
- print repr(dedent(s)) # prints 'hello\n world\n'
+ Note that tabs and spaces are both treated as whitespace, but they
+ are not equal: the lines " hello" and "\thello" are
+ considered to have no common leading whitespace. (This behaviour is
+ new in Python 2.5; older versions of this module incorrectly
+ expanded tabs before searching for common leading whitespace.)
"""
- lines = text.expandtabs().split('\n')
+ # Look for the longest leading string of spaces and tabs common to
+ # all lines.
margin = None
- for line in lines:
- content = line.lstrip()
- if not content:
- continue
- indent = len(line) - len(content)
+ text = _whitespace_only_re.sub('', text)
+ indents = _leading_whitespace_re.findall(text)
+ for indent in indents:
if margin is None:
margin = indent
- else:
- margin = min(margin, indent)
- if margin is not None and margin > 0:
- for i in range(len(lines)):
- lines[i] = lines[i][margin:]
+ # Current line more deeply indented than previous winner:
+ # no change (previous winner is still on top).
+ elif indent.startswith(margin):
+ pass
+
+ # Current line consistent with and no deeper than previous winner:
+ # it's the new winner.
+ elif margin.startswith(indent):
+ margin = indent
- return '\n'.join(lines)
+ # Current line and previous winner have no common whitespace:
+ # there is no margin.
+ else:
+ margin = ""
+ break
+
+ # sanity check (testing/debugging only)
+ if 0 and margin:
+ for line in text.split("\n"):
+ assert not line or line.startswith(margin), \
+ "line = %r, margin = %r" % (line, margin)
+
+ if margin:
+ text = re.sub(r'(?m)^' + margin, '', text)
+ return text
+
+if __name__ == "__main__":
+ #print dedent("\tfoo\n\tbar")
+ #print dedent(" \thello there\n \t how are you?")
+ print dedent("Hello there.\n This is indented.")
diff --git a/Misc/NEWS b/Misc/NEWS
index 3ec43af..80e6d95 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -145,6 +145,9 @@ Extension Modules
Library
-------
+- Bug #1361643: fix textwrap.dedent() so it handles tabs appropriately;
+ clarify docs.
+
- The wsgiref package has been added to the standard library.
- The functions update_wrapper() and wraps() have been added to the functools