diff options
author | Greg Ward <gward@python.net> | 2006-06-11 00:40:49 (GMT) |
---|---|---|
committer | Greg Ward <gward@python.net> | 2006-06-11 00:40:49 (GMT) |
commit | 7f54740c4ddacf99eba5bb18abe904a6a4960165 (patch) | |
tree | 3bb108337bc5a5ac0b51f3cb4c5b59391b0d88e1 /Lib/textwrap.py | |
parent | 0e1159583c06fdf85d7d2dbe8b82e42565b9d166 (diff) | |
download | cpython-7f54740c4ddacf99eba5bb18abe904a6a4960165.zip cpython-7f54740c4ddacf99eba5bb18abe904a6a4960165.tar.gz cpython-7f54740c4ddacf99eba5bb18abe904a6a4960165.tar.bz2 |
Bug #1361643: fix textwrap.dedent() so it handles tabs appropriately,
i.e. do *not* expand tabs, but treat them as whitespace that is not
equivalent to spaces. Add a couple of test cases. Clarify docs.
Diffstat (limited to 'Lib/textwrap.py')
-rw-r--r-- | Lib/textwrap.py | 75 |
1 files changed, 46 insertions, 29 deletions
diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 7c68280..e18000a 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -317,41 +317,58 @@ def fill(text, width=70, **kwargs): # -- Loosely related functionality ------------------------------------- -def dedent(text): - """dedent(text : string) -> string - - Remove any whitespace than can be uniformly removed from the left - of every line in `text`. +_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) +_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) - This can be used e.g. to make triple-quoted strings line up with - the left edge of screen/whatever, while still presenting it in the - source code in indented form. +def dedent(text): + """Remove any common leading whitespace from every line in `text`. - For example: + This can be used to make triple-quoted strings line up with the left + edge of the display, while still presenting them in the source code + in indented form. - def test(): - # end first line with \ to avoid the empty line! - s = '''\ - hello - world - ''' - print repr(s) # prints ' hello\n world\n ' - print repr(dedent(s)) # prints 'hello\n world\n' + Note that tabs and spaces are both treated as whitespace, but they + are not equal: the lines " hello" and "\thello" are + considered to have no common leading whitespace. (This behaviour is + new in Python 2.5; older versions of this module incorrectly + expanded tabs before searching for common leading whitespace.) """ - lines = text.expandtabs().split('\n') + # Look for the longest leading string of spaces and tabs common to + # all lines. margin = None - for line in lines: - content = line.lstrip() - if not content: - continue - indent = len(line) - len(content) + text = _whitespace_only_re.sub('', text) + indents = _leading_whitespace_re.findall(text) + for indent in indents: if margin is None: margin = indent - else: - margin = min(margin, indent) - if margin is not None and margin > 0: - for i in range(len(lines)): - lines[i] = lines[i][margin:] + # Current line more deeply indented than previous winner: + # no change (previous winner is still on top). + elif indent.startswith(margin): + pass + + # Current line consistent with and no deeper than previous winner: + # it's the new winner. + elif margin.startswith(indent): + margin = indent - return '\n'.join(lines) + # Current line and previous winner have no common whitespace: + # there is no margin. + else: + margin = "" + break + + # sanity check (testing/debugging only) + if 0 and margin: + for line in text.split("\n"): + assert not line or line.startswith(margin), \ + "line = %r, margin = %r" % (line, margin) + + if margin: + text = re.sub(r'(?m)^' + margin, '', text) + return text + +if __name__ == "__main__": + #print dedent("\tfoo\n\tbar") + #print dedent(" \thello there\n \t how are you?") + print dedent("Hello there.\n This is indented.") |