diff options
-rw-r--r-- | Doc/lib/libtextwrap.tex | 15 | ||||
-rw-r--r-- | Lib/test/test_textwrap.py | 61 | ||||
-rw-r--r-- | Lib/textwrap.py | 75 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
4 files changed, 109 insertions, 45 deletions
diff --git a/Doc/lib/libtextwrap.tex b/Doc/lib/libtextwrap.tex index 9fb0816..38f9b03 100644 --- a/Doc/lib/libtextwrap.tex +++ b/Doc/lib/libtextwrap.tex @@ -47,12 +47,17 @@ remove indentation from strings that have unwanted whitespace to the left of the text. \begin{funcdesc}{dedent}{text} -Remove any whitespace that can be uniformly removed from the left -of every line in \var{text}. +Remove any common leading whitespace from every line in \var{text}. -This is typically used to make triple-quoted strings line up with -the left edge of screen/whatever, while still presenting it in the -source code in indented form. +This can be used to make triple-quoted strings line up with the left +edge of the display, while still presenting them in the source code +in indented form. + +Note that tabs and spaces are both treated as whitespace, but they are +not equal: the lines \code{" {} hello"} and \code{"\textbackslash{}thello"} +are considered to have no common leading whitespace. (This behaviour is +new in Python 2.5; older versions of this module incorrectly expanded +tabs before searching for common leading whitespace.) For example: \begin{verbatim} diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py index 68e4d6d..98cc869 100644 --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -460,38 +460,42 @@ some (including a hanging indent).''' # of IndentTestCase! class DedentTestCase(unittest.TestCase): + def assertUnchanged(self, text): + """assert that dedent() has no effect on 'text'""" + self.assertEquals(text, dedent(text)) + def test_dedent_nomargin(self): # No lines indented. text = "Hello there.\nHow are you?\nOh good, I'm glad." - self.assertEquals(dedent(text), text) + self.assertUnchanged(text) # Similar, with a blank line. text = "Hello there.\n\nBoo!" - self.assertEquals(dedent(text), text) + self.assertUnchanged(text) # Some lines indented, but overall margin is still zero. text = "Hello there.\n This is indented." - self.assertEquals(dedent(text), text) + self.assertUnchanged(text) # Again, add a blank line. text = "Hello there.\n\n Boo!\n" - self.assertEquals(dedent(text), text) + self.assertUnchanged(text) def test_dedent_even(self): # All lines indented by two spaces. text = " Hello there.\n How are ya?\n Oh good." expect = "Hello there.\nHow are ya?\nOh good." - self.assertEquals(dedent(text), expect) + self.assertEquals(expect, dedent(text)) # Same, with blank lines. text = " Hello there.\n\n How are ya?\n Oh good.\n" expect = "Hello there.\n\nHow are ya?\nOh good.\n" - self.assertEquals(dedent(text), expect) + self.assertEquals(expect, dedent(text)) # Now indent one of the blank lines. text = " Hello there.\n \n How are ya?\n Oh good.\n" expect = "Hello there.\n\nHow are ya?\nOh good.\n" - self.assertEquals(dedent(text), expect) + self.assertEquals(expect, dedent(text)) def test_dedent_uneven(self): # Lines indented unevenly. @@ -505,18 +509,53 @@ def foo(): while 1: return foo ''' - self.assertEquals(dedent(text), expect) + self.assertEquals(expect, dedent(text)) # Uneven indentation with a blank line. text = " Foo\n Bar\n\n Baz\n" expect = "Foo\n Bar\n\n Baz\n" - self.assertEquals(dedent(text), expect) + self.assertEquals(expect, dedent(text)) # Uneven indentation with a whitespace-only line. text = " Foo\n Bar\n \n Baz\n" expect = "Foo\n Bar\n\n Baz\n" - self.assertEquals(dedent(text), expect) - + self.assertEquals(expect, dedent(text)) + + # dedent() should not mangle internal tabs + def test_dedent_preserve_internal_tabs(self): + text = " hello\tthere\n how are\tyou?" + expect = "hello\tthere\nhow are\tyou?" + self.assertEquals(expect, dedent(text)) + + # make sure that it preserves tabs when it's not making any + # changes at all + self.assertEquals(expect, dedent(expect)) + + # dedent() should not mangle tabs in the margin (i.e. + # tabs and spaces both count as margin, but are *not* + # considered equivalent) + def test_dedent_preserve_margin_tabs(self): + text = " hello there\n\thow are you?" + self.assertUnchanged(text) + + # same effect even if we have 8 spaces + text = " hello there\n\thow are you?" + self.assertUnchanged(text) + + # dedent() only removes whitespace that can be uniformly removed! + text = "\thello there\n\thow are you?" + expect = "hello there\nhow are you?" + self.assertEquals(expect, dedent(text)) + + text = " \thello there\n \thow are you?" + self.assertEquals(expect, dedent(text)) + + text = " \t hello there\n \t how are you?" + self.assertEquals(expect, dedent(text)) + + text = " \thello there\n \t how are you?" + expect = "hello there\n how are you?" + self.assertEquals(expect, dedent(text)) def test_main(): diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 7c68280..e18000a 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -317,41 +317,58 @@ def fill(text, width=70, **kwargs): # -- Loosely related functionality ------------------------------------- -def dedent(text): - """dedent(text : string) -> string - - Remove any whitespace than can be uniformly removed from the left - of every line in `text`. +_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) +_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) - This can be used e.g. to make triple-quoted strings line up with - the left edge of screen/whatever, while still presenting it in the - source code in indented form. +def dedent(text): + """Remove any common leading whitespace from every line in `text`. - For example: + This can be used to make triple-quoted strings line up with the left + edge of the display, while still presenting them in the source code + in indented form. - def test(): - # end first line with \ to avoid the empty line! - s = '''\ - hello - world - ''' - print repr(s) # prints ' hello\n world\n ' - print repr(dedent(s)) # prints 'hello\n world\n' + Note that tabs and spaces are both treated as whitespace, but they + are not equal: the lines " hello" and "\thello" are + considered to have no common leading whitespace. (This behaviour is + new in Python 2.5; older versions of this module incorrectly + expanded tabs before searching for common leading whitespace.) """ - lines = text.expandtabs().split('\n') + # Look for the longest leading string of spaces and tabs common to + # all lines. margin = None - for line in lines: - content = line.lstrip() - if not content: - continue - indent = len(line) - len(content) + text = _whitespace_only_re.sub('', text) + indents = _leading_whitespace_re.findall(text) + for indent in indents: if margin is None: margin = indent - else: - margin = min(margin, indent) - if margin is not None and margin > 0: - for i in range(len(lines)): - lines[i] = lines[i][margin:] + # Current line more deeply indented than previous winner: + # no change (previous winner is still on top). + elif indent.startswith(margin): + pass + + # Current line consistent with and no deeper than previous winner: + # it's the new winner. + elif margin.startswith(indent): + margin = indent - return '\n'.join(lines) + # Current line and previous winner have no common whitespace: + # there is no margin. + else: + margin = "" + break + + # sanity check (testing/debugging only) + if 0 and margin: + for line in text.split("\n"): + assert not line or line.startswith(margin), \ + "line = %r, margin = %r" % (line, margin) + + if margin: + text = re.sub(r'(?m)^' + margin, '', text) + return text + +if __name__ == "__main__": + #print dedent("\tfoo\n\tbar") + #print dedent(" \thello there\n \t how are you?") + print dedent("Hello there.\n This is indented.") @@ -145,6 +145,9 @@ Extension Modules Library ------- +- Bug #1361643: fix textwrap.dedent() so it handles tabs appropriately; + clarify docs. + - The wsgiref package has been added to the standard library. - The functions update_wrapper() and wraps() have been added to the functools |