summaryrefslogtreecommitdiffstats
path: root/Lib/textwrap.py
diff options
context:
space:
mode:
authorGreg Ward <gward@python.net>2006-06-11 00:40:49 (GMT)
committerGreg Ward <gward@python.net>2006-06-11 00:40:49 (GMT)
commit7f54740c4ddacf99eba5bb18abe904a6a4960165 (patch)
tree3bb108337bc5a5ac0b51f3cb4c5b59391b0d88e1 /Lib/textwrap.py
parent0e1159583c06fdf85d7d2dbe8b82e42565b9d166 (diff)
downloadcpython-7f54740c4ddacf99eba5bb18abe904a6a4960165.zip
cpython-7f54740c4ddacf99eba5bb18abe904a6a4960165.tar.gz
cpython-7f54740c4ddacf99eba5bb18abe904a6a4960165.tar.bz2
Bug #1361643: fix textwrap.dedent() so it handles tabs appropriately,
i.e. do *not* expand tabs, but treat them as whitespace that is not equivalent to spaces. Add a couple of test cases. Clarify docs.
Diffstat (limited to 'Lib/textwrap.py')
-rw-r--r--Lib/textwrap.py75
1 files changed, 46 insertions, 29 deletions
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 7c68280..e18000a 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -317,41 +317,58 @@ def fill(text, width=70, **kwargs):
# -- Loosely related functionality -------------------------------------
-def dedent(text):
- """dedent(text : string) -> string
-
- Remove any whitespace than can be uniformly removed from the left
- of every line in `text`.
+_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
+_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
- This can be used e.g. to make triple-quoted strings line up with
- the left edge of screen/whatever, while still presenting it in the
- source code in indented form.
+def dedent(text):
+ """Remove any common leading whitespace from every line in `text`.
- For example:
+ This can be used to make triple-quoted strings line up with the left
+ edge of the display, while still presenting them in the source code
+ in indented form.
- def test():
- # end first line with \ to avoid the empty line!
- s = '''\
- hello
- world
- '''
- print repr(s) # prints ' hello\n world\n '
- print repr(dedent(s)) # prints 'hello\n world\n'
+ Note that tabs and spaces are both treated as whitespace, but they
+ are not equal: the lines " hello" and "\thello" are
+ considered to have no common leading whitespace. (This behaviour is
+ new in Python 2.5; older versions of this module incorrectly
+ expanded tabs before searching for common leading whitespace.)
"""
- lines = text.expandtabs().split('\n')
+ # Look for the longest leading string of spaces and tabs common to
+ # all lines.
margin = None
- for line in lines:
- content = line.lstrip()
- if not content:
- continue
- indent = len(line) - len(content)
+ text = _whitespace_only_re.sub('', text)
+ indents = _leading_whitespace_re.findall(text)
+ for indent in indents:
if margin is None:
margin = indent
- else:
- margin = min(margin, indent)
- if margin is not None and margin > 0:
- for i in range(len(lines)):
- lines[i] = lines[i][margin:]
+ # Current line more deeply indented than previous winner:
+ # no change (previous winner is still on top).
+ elif indent.startswith(margin):
+ pass
+
+ # Current line consistent with and no deeper than previous winner:
+ # it's the new winner.
+ elif margin.startswith(indent):
+ margin = indent
- return '\n'.join(lines)
+ # Current line and previous winner have no common whitespace:
+ # there is no margin.
+ else:
+ margin = ""
+ break
+
+ # sanity check (testing/debugging only)
+ if 0 and margin:
+ for line in text.split("\n"):
+ assert not line or line.startswith(margin), \
+ "line = %r, margin = %r" % (line, margin)
+
+ if margin:
+ text = re.sub(r'(?m)^' + margin, '', text)
+ return text
+
+if __name__ == "__main__":
+ #print dedent("\tfoo\n\tbar")
+ #print dedent(" \thello there\n \t how are you?")
+ print dedent("Hello there.\n This is indented.")