From 3209cbd99b6d65aa18b3beb124fac9c792b8993d Mon Sep 17 00:00:00 2001 From: lrjball <50599110+lrjball@users.noreply.github.com> Date: Thu, 30 Apr 2020 04:42:45 +0100 Subject: bpo-40394 - difflib.SequenceMatched.find_longest_match default args (GH-19742) * bpo-40394 - difflib.SequenceMatched.find_longest_match default args Added default args to find_longest_match, as well as related tests. --- Doc/library/difflib.rst | 5 ++- Lib/difflib.py | 10 ++++- Lib/test/test_difflib.py | 48 +++++++++++++++++++++- Misc/ACKS | 1 + .../2020-04-28-18-59-48.bpo-40394.Yi5uuM.rst | 1 + 5 files changed, 61 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-04-28-18-59-48.bpo-40394.Yi5uuM.rst diff --git a/Doc/library/difflib.rst b/Doc/library/difflib.rst index ada311b..7a898c2 100644 --- a/Doc/library/difflib.rst +++ b/Doc/library/difflib.rst @@ -421,7 +421,7 @@ The :class:`SequenceMatcher` class has this constructor: is not changed. - .. method:: find_longest_match(alo, ahi, blo, bhi) + .. method:: find_longest_match(alo=0, ahi=None, blo=0, bhi=None) Find longest matching block in ``a[alo:ahi]`` and ``b[blo:bhi]``. @@ -458,6 +458,9 @@ The :class:`SequenceMatcher` class has this constructor: This method returns a :term:`named tuple` ``Match(a, b, size)``. + .. versionchanged:: 3.9 + Added default arguments. + .. method:: get_matching_blocks() diff --git a/Lib/difflib.py b/Lib/difflib.py index f2215d8..0dda80d 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -130,7 +130,7 @@ class SequenceMatcher: set_seq2(b) Set the second sequence to be compared. - find_longest_match(alo, ahi, blo, bhi) + find_longest_match(alo=0, ahi=None, blo=0, bhi=None) Find longest matching block in a[alo:ahi] and b[blo:bhi]. get_matching_blocks() @@ -334,9 +334,11 @@ class SequenceMatcher: for elt in popular: # ditto; as fast for 1% deletion del b2j[elt] - def find_longest_match(self, alo, ahi, blo, bhi): + def find_longest_match(self, alo=0, ahi=None, blo=0, bhi=None): """Find longest matching block in a[alo:ahi] and b[blo:bhi]. + By default it will find the longest match in the entirety of a and b. + If isjunk is not defined: Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where @@ -391,6 +393,10 @@ class SequenceMatcher: # the unique 'b's and then matching the first two 'a's. a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__ + if ahi is None: + ahi = len(a) + if bhi is None: + bhi = len(b) besti, bestj, bestsize = alo, blo, 0 # find longest junk-free match # during an iteration of the loop, j2len[j] = length of longest diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index 5e2ca1a..42ac1fd 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -501,12 +501,58 @@ class TestJunkAPIs(unittest.TestCase): for char in ['a', '#', '\n', '\f', '\r', '\v']: self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char)) +class TestFindLongest(unittest.TestCase): + def longer_match_exists(self, a, b, n): + return any(b_part in a for b_part in + [b[i:i + n + 1] for i in range(0, len(b) - n - 1)]) + + def test_default_args(self): + a = 'foo bar' + b = 'foo baz bar' + sm = difflib.SequenceMatcher(a=a, b=b) + match = sm.find_longest_match() + self.assertEqual(match.a, 0) + self.assertEqual(match.b, 0) + self.assertEqual(match.size, 6) + self.assertEqual(a[match.a: match.a + match.size], + b[match.b: match.b + match.size]) + self.assertFalse(self.longer_match_exists(a, b, match.size)) + + match = sm.find_longest_match(alo=2, blo=4) + self.assertEqual(match.a, 3) + self.assertEqual(match.b, 7) + self.assertEqual(match.size, 4) + self.assertEqual(a[match.a: match.a + match.size], + b[match.b: match.b + match.size]) + self.assertFalse(self.longer_match_exists(a[2:], b[4:], match.size)) + + match = sm.find_longest_match(bhi=5, blo=1) + self.assertEqual(match.a, 1) + self.assertEqual(match.b, 1) + self.assertEqual(match.size, 4) + self.assertEqual(a[match.a: match.a + match.size], + b[match.b: match.b + match.size]) + self.assertFalse(self.longer_match_exists(a, b[1:5], match.size)) + + def test_longest_match_with_popular_chars(self): + a = 'dabcd' + b = 'd'*100 + 'abc' + 'd'*100 # length over 200 so popular used + sm = difflib.SequenceMatcher(a=a, b=b) + match = sm.find_longest_match(0, len(a), 0, len(b)) + self.assertEqual(match.a, 0) + self.assertEqual(match.b, 99) + self.assertEqual(match.size, 5) + self.assertEqual(a[match.a: match.a + match.size], + b[match.b: match.b + match.size]) + self.assertFalse(self.longer_match_exists(a, b, match.size)) + + def test_main(): difflib.HtmlDiff._default_prefix = 0 Doctests = doctest.DocTestSuite(difflib) run_unittest( TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs, - TestOutputFormat, TestBytes, TestJunkAPIs, Doctests) + TestOutputFormat, TestBytes, TestJunkAPIs, TestFindLongest, Doctests) if __name__ == '__main__': test_main() diff --git a/Misc/ACKS b/Misc/ACKS index 89f37e5..21822dd 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -88,6 +88,7 @@ Dwayne Bailey Stig Bakken Aleksandr Balezin Greg Ball +Lewis Ball Luigi Ballabio Thomas Ballinger Jeff Balogh diff --git a/Misc/NEWS.d/next/Library/2020-04-28-18-59-48.bpo-40394.Yi5uuM.rst b/Misc/NEWS.d/next/Library/2020-04-28-18-59-48.bpo-40394.Yi5uuM.rst new file mode 100644 index 0000000..ef2e239 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-04-28-18-59-48.bpo-40394.Yi5uuM.rst @@ -0,0 +1 @@ +Added default arguments to :meth:`difflib.SequenceMatcher.find_longest_match()`. \ No newline at end of file -- cgit v0.12