From acc9f3fb1622c0685b52097b977cafada742be99 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 15 Oct 2013 21:22:54 +0300 Subject: Issue #18725: The textwrap module now supports truncating multiline text. --- Doc/library/textwrap.rst | 28 +++++++------ Lib/test/test_textwrap.py | 105 ++++++++++++++++++++++++++++++++++++++++++---- Lib/textwrap.py | 88 +++++++++++++++++++++----------------- Misc/NEWS | 2 + 4 files changed, 165 insertions(+), 58 deletions(-) diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst index 6ac1e77..1ba42a3 100644 --- a/Doc/library/textwrap.rst +++ b/Doc/library/textwrap.rst @@ -250,6 +250,22 @@ hyphenated words; only then will long words be broken if necessary, unless was to always allow breaking hyphenated words. + .. attribute:: max_lines + + (default: ``None``) If not ``None``, then the text be will truncated to + *max_lines* lines. + + .. versionadded:: 3.4 + + + .. attribute:: placeholder + + (default: ``' [...]'``) String that will be appended to the last line of + text if it will be truncated. + + .. versionadded:: 3.4 + + :class:`TextWrapper` also provides some public methods, analogous to the module-level convenience functions: @@ -266,15 +282,3 @@ hyphenated words; only then will long words be broken if necessary, unless Wraps the single paragraph in *text*, and returns a single string containing the wrapped paragraph. - - - .. function:: shorten(text, *, placeholder=" [...]") - - Collapse and truncate the given text to fit in :attr:`width` - characters. - - The text first has its whitespace collapsed. If it then fits in - :attr:`width`, it is returned as-is. Otherwise, as many words - as possible are joined and then the *placeholder* is appended. - - .. versionadded:: 3.4 diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py index 36c15cc..1bba77e 100644 --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -42,10 +42,6 @@ class BaseTestCase(unittest.TestCase): "\nexpected %r\n" "but got %r" % (expect, result)) - def check_shorten(self, text, width, expect, **kwargs): - result = shorten(text, width, **kwargs) - self.check(result, expect) - class WrapTestCase(BaseTestCase): @@ -433,6 +429,90 @@ What a mess! self.check_wrap(text, 7, ["aa \xe4\xe4-", "\xe4\xe4"]) +class MaxLinesTestCase(BaseTestCase): + text = "Hello there, how are you this fine day? I'm glad to hear it!" + + def test_simple(self): + self.check_wrap(self.text, 12, + ["Hello [...]"], + max_lines=0) + self.check_wrap(self.text, 12, + ["Hello [...]"], + max_lines=1) + self.check_wrap(self.text, 12, + ["Hello there,", + "how [...]"], + max_lines=2) + self.check_wrap(self.text, 13, + ["Hello there,", + "how are [...]"], + max_lines=2) + self.check_wrap(self.text, 80, [self.text], max_lines=1) + self.check_wrap(self.text, 12, + ["Hello there,", + "how are you", + "this fine", + "day? I'm", + "glad to hear", + "it!"], + max_lines=6) + + def test_spaces(self): + # strip spaces before placeholder + self.check_wrap(self.text, 12, + ["Hello there,", + "how are you", + "this fine", + "day? [...]"], + max_lines=4) + # placeholder at the start of line + self.check_wrap(self.text, 6, + ["Hello", + "[...]"], + max_lines=2) + # final spaces + self.check_wrap(self.text + ' ' * 10, 12, + ["Hello there,", + "how are you", + "this fine", + "day? I'm", + "glad to hear", + "it!"], + max_lines=6) + + def test_placeholder(self): + self.check_wrap(self.text, 12, + ["Hello..."], + max_lines=1, + placeholder='...') + self.check_wrap(self.text, 12, + ["Hello there,", + "how are..."], + max_lines=2, + placeholder='...') + # long placeholder and indentation + with self.assertRaises(ValueError): + wrap(self.text, 16, initial_indent=' ', + max_lines=1, placeholder=' [truncated]...') + with self.assertRaises(ValueError): + wrap(self.text, 16, subsequent_indent=' ', + max_lines=2, placeholder=' [truncated]...') + self.check_wrap(self.text, 16, + [" Hello there,", + " [truncated]..."], + max_lines=2, + initial_indent=' ', + subsequent_indent=' ', + placeholder=' [truncated]...') + self.check_wrap(self.text, 16, + [" [truncated]..."], + max_lines=1, + initial_indent=' ', + subsequent_indent=' ', + placeholder=' [truncated]...') + self.check_wrap(self.text, 80, [self.text], placeholder='.' * 1000) + + class LongWordTestCase (BaseTestCase): def setUp(self): self.wrapper = TextWrapper() @@ -493,6 +573,14 @@ How *do* you spell that odd word, anyways? result = wrap(self.text, width=30, break_long_words=0) self.check(result, expect) + def test_max_lines_long(self): + self.check_wrap(self.text, 12, + ['Did you say ', + '"supercalifr', + 'agilisticexp', + '[...]'], + max_lines=4) + class IndentTestCases(BaseTestCase): @@ -782,6 +870,10 @@ class IndentTestCase(unittest.TestCase): class ShortenTestCase(BaseTestCase): + def check_shorten(self, text, width, expect, **kwargs): + result = shorten(text, width, **kwargs) + self.check(result, expect) + def test_simple(self): # Simple case: just words, spaces, and a bit of punctuation text = "Hello there, how are you this fine day? I'm glad to hear it!" @@ -825,10 +917,9 @@ class ShortenTestCase(BaseTestCase): self.check_shorten("hello world! ", 10, "[...]") def test_width_too_small_for_placeholder(self): - wrapper = TextWrapper(width=8) - wrapper.shorten("x" * 20, placeholder="(......)") + shorten("x" * 20, width=8, placeholder="(......)") with self.assertRaises(ValueError): - wrapper.shorten("x" * 20, placeholder="(.......)") + shorten("x" * 20, width=8, placeholder="(.......)") def test_first_word_too_long_but_placeholder_fits(self): self.check_shorten("Helloo", 5, "[...]") diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 27ebc16..15a7534 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -19,8 +19,6 @@ __all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent'] # since 0xa0 is not in range(128). _whitespace = '\t\n\x0b\x0c\r ' -_default_placeholder = ' [...]' - class TextWrapper: """ Object for wrapping/filling text. The public interface consists of @@ -64,6 +62,10 @@ class TextWrapper: compound words. drop_whitespace (default: true) Drop leading and trailing whitespace from lines. + max_lines (default: None) + Truncate wrapped lines. + placeholder (default: ' [...]') + Append to the last line of truncated text. """ unicode_whitespace_trans = {} @@ -106,7 +108,10 @@ class TextWrapper: break_long_words=True, drop_whitespace=True, break_on_hyphens=True, - tabsize=8): + tabsize=8, + *, + max_lines=None, + placeholder=' [...]'): self.width = width self.initial_indent = initial_indent self.subsequent_indent = subsequent_indent @@ -117,6 +122,8 @@ class TextWrapper: self.drop_whitespace = drop_whitespace self.break_on_hyphens = break_on_hyphens self.tabsize = tabsize + self.max_lines = max_lines + self.placeholder = placeholder # -- Private methods ----------------------------------------------- @@ -225,6 +232,13 @@ class TextWrapper: lines = [] if self.width <= 0: raise ValueError("invalid width %r (must be > 0)" % self.width) + if self.max_lines is not None: + if self.max_lines > 1: + indent = self.subsequent_indent + else: + indent = self.initial_indent + if len(indent) + len(self.placeholder.lstrip()) > self.width: + raise ValueError("placeholder too large for max width") # Arrange in reverse order so items can be efficiently popped # from a stack of chucks. @@ -267,15 +281,41 @@ class TextWrapper: # fit on *any* line (not just this one). if chunks and len(chunks[-1]) > width: self._handle_long_word(chunks, cur_line, cur_len, width) + cur_len = sum(map(len, cur_line)) # If the last chunk on this line is all whitespace, drop it. if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': + cur_len -= len(cur_line[-1]) del cur_line[-1] - # Convert current line back to a string and store it in list - # of all lines (return value). if cur_line: - lines.append(indent + ''.join(cur_line)) + if (self.max_lines is None or + len(lines) + 1 < self.max_lines or + (not chunks or + self.drop_whitespace and + len(chunks) == 1 and + not chunks[0].strip()) and cur_len <= width): + # Convert current line back to a string and store it in + # list of all lines (return value). + lines.append(indent + ''.join(cur_line)) + else: + while cur_line: + if (cur_line[-1].strip() and + cur_len + len(self.placeholder) <= width): + cur_line.append(self.placeholder) + lines.append(indent + ''.join(cur_line)) + break + cur_len -= len(cur_line[-1]) + del cur_line[-1] + else: + if lines: + prev_line = lines[-1].rstrip() + if (len(prev_line) + len(self.placeholder) <= + self.width): + lines[-1] = prev_line + self.placeholder + break + lines.append(indent + self.placeholder.lstrip()) + break return lines @@ -308,36 +348,6 @@ class TextWrapper: """ return "\n".join(self.wrap(text)) - def shorten(self, text, *, placeholder=_default_placeholder): - """shorten(text: str) -> str - - Collapse and truncate the given text to fit in 'self.width' columns. - """ - max_length = self.width - if max_length < len(placeholder.strip()): - raise ValueError("placeholder too large for max width") - sep = ' ' - sep_len = len(sep) - parts = [] - cur_len = 0 - chunks = self._split_chunks(text) - for chunk in chunks: - if not chunk.strip(): - continue - chunk_len = len(chunk) + sep_len if parts else len(chunk) - if cur_len + chunk_len > max_length: - break - parts.append(chunk) - cur_len += chunk_len - else: - # No truncation necessary - return sep.join(parts) - max_truncated_length = max_length - len(placeholder) - while parts and cur_len > max_truncated_length: - last = parts.pop() - cur_len -= len(last) + sep_len - return (sep.join(parts) + placeholder).strip() - # -- Convenience interface --------------------------------------------- @@ -366,7 +376,7 @@ def fill(text, width=70, **kwargs): w = TextWrapper(width=width, **kwargs) return w.fill(text) -def shorten(text, width, *, placeholder=_default_placeholder, **kwargs): +def shorten(text, width, **kwargs): """Collapse and truncate the given text to fit in the given width. The text first has its whitespace collapsed. If it then fits in @@ -378,8 +388,8 @@ def shorten(text, width, *, placeholder=_default_placeholder, **kwargs): >>> textwrap.shorten("Hello world!", width=11) 'Hello [...]' """ - w = TextWrapper(width=width, **kwargs) - return w.shorten(text, placeholder=placeholder) + w = TextWrapper(width=width, max_lines=1, **kwargs) + return w.fill(' '.join(text.strip().split())) # -- Loosely related functionality ------------------------------------- diff --git a/Misc/NEWS b/Misc/NEWS index 4ee5a58..88c9bc3 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -42,6 +42,8 @@ Core and Builtins Library ------- +- Issue #18725: The textwrap module now supports truncating multiline text. + - Issue #18776: atexit callbacks now display their full traceback when they raise an exception. -- cgit v0.12