From b81c833ab51fb7d7f0f8eaace37f60ef7455aa85 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sun, 18 Oct 2020 18:01:15 +0100 Subject: bpo-28660: Make TextWrapper break long words on hyphens (GH-22721) --- Lib/test/test_textwrap.py | 72 ++++++++++++++++++++++ Lib/textwrap.py | 12 +++- .../2020-10-16-16-08-04.bpo-28660.eX9pvD.rst | 1 + 3 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py index ed97f70..dfbc2b9 100644 --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -640,6 +640,78 @@ How *do* you spell that odd word, anyways? max_lines=4) +class LongWordWithHyphensTestCase(BaseTestCase): + def setUp(self): + self.wrapper = TextWrapper() + self.text1 = '''\ +We used enyzme 2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate synthase. +''' + self.text2 = '''\ +1234567890-1234567890--this_is_a_very_long_option_indeed-good-bye" +''' + + def test_break_long_words_on_hyphen(self): + expected = ['We used enyzme 2-succinyl-6-hydroxy-2,4-', + 'cyclohexadiene-1-carboxylate synthase.'] + self.check_wrap(self.text1, 50, expected) + + expected = ['We used', 'enyzme 2-', 'succinyl-', '6-hydroxy-', '2,4-', + 'cyclohexad', 'iene-1-', 'carboxylat', 'e', 'synthase.'] + self.check_wrap(self.text1, 10, expected) + + expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo', + 'ng_option_', 'indeed-', 'good-bye"'] + self.check_wrap(self.text2, 10, expected) + + def test_break_long_words_not_on_hyphen(self): + expected = ['We used enyzme 2-succinyl-6-hydroxy-2,4-cyclohexad', + 'iene-1-carboxylate synthase.'] + self.check_wrap(self.text1, 50, expected, break_on_hyphens=False) + + expected = ['We used', 'enyzme 2-s', 'uccinyl-6-', 'hydroxy-2,', + '4-cyclohex', 'adiene-1-c', 'arboxylate', 'synthase.'] + self.check_wrap(self.text1, 10, expected, break_on_hyphens=False) + + expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo', + 'ng_option_', 'indeed-', 'good-bye"'] + self.check_wrap(self.text2, 10, expected) + + def test_break_on_hyphen_but_not_long_words(self): + expected = ['We used enyzme', + '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate', + 'synthase.'] + + self.check_wrap(self.text1, 50, expected, break_long_words=False) + + expected = ['We used', 'enyzme', + '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate', + 'synthase.'] + self.check_wrap(self.text1, 10, expected, break_long_words=False) + + expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo', + 'ng_option_', 'indeed-', 'good-bye"'] + self.check_wrap(self.text2, 10, expected) + + + def test_do_not_break_long_words_or_on_hyphens(self): + expected = ['We used enyzme', + '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate', + 'synthase.'] + self.check_wrap(self.text1, 50, expected, + break_long_words=False, + break_on_hyphens=False) + + expected = ['We used', 'enyzme', + '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate', + 'synthase.'] + self.check_wrap(self.text1, 10, expected, + break_long_words=False, + break_on_hyphens=False) + + expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo', + 'ng_option_', 'indeed-', 'good-bye"'] + self.check_wrap(self.text2, 10, expected) + class IndentTestCases(BaseTestCase): # called before each test method diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 30e693c..841de9b 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -215,8 +215,16 @@ class TextWrapper: # If we're allowed to break long words, then do so: put as much # of the next chunk onto the current line as will fit. if self.break_long_words: - cur_line.append(reversed_chunks[-1][:space_left]) - reversed_chunks[-1] = reversed_chunks[-1][space_left:] + end = space_left + chunk = reversed_chunks[-1] + if self.break_on_hyphens and len(chunk) > space_left: + # break after last hyphen, but only if there are + # non-hyphens before it + hyphen = chunk.rfind('-', 0, space_left) + if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]): + end = hyphen + 1 + cur_line.append(chunk[:end]) + reversed_chunks[-1] = chunk[end:] # Otherwise, we have to preserve the long word intact. Only add # it to the current line if there's nothing already there -- diff --git a/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst b/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst new file mode 100644 index 0000000..d679934 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst @@ -0,0 +1 @@ +:func:`textwrap.wrap` now attempts to break long words after hyphens when ``break_long_words=True`` and ``break_on_hyphens=True``. -- cgit v0.12