summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rw-r--r--Lib/difflib.py10
-rw-r--r--Lib/test/test_difflib.py48
2 files changed, 55 insertions, 3 deletions
diff --git a/Lib/difflib.py b/Lib/difflib.py
index f2215d8..0dda80d 100644
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -130,7 +130,7 @@ class SequenceMatcher:
set_seq2(b)
Set the second sequence to be compared.
- find_longest_match(alo, ahi, blo, bhi)
+ find_longest_match(alo=0, ahi=None, blo=0, bhi=None)
Find longest matching block in a[alo:ahi] and b[blo:bhi].
get_matching_blocks()
@@ -334,9 +334,11 @@ class SequenceMatcher:
for elt in popular: # ditto; as fast for 1% deletion
del b2j[elt]
- def find_longest_match(self, alo, ahi, blo, bhi):
+ def find_longest_match(self, alo=0, ahi=None, blo=0, bhi=None):
"""Find longest matching block in a[alo:ahi] and b[blo:bhi].
+ By default it will find the longest match in the entirety of a and b.
+
If isjunk is not defined:
Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
@@ -391,6 +393,10 @@ class SequenceMatcher:
# the unique 'b's and then matching the first two 'a's.
a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__
+ if ahi is None:
+ ahi = len(a)
+ if bhi is None:
+ bhi = len(b)
besti, bestj, bestsize = alo, blo, 0
# find longest junk-free match
# during an iteration of the loop, j2len[j] = length of longest
diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py
index 5e2ca1a..42ac1fd 100644
--- a/Lib/test/test_difflib.py
+++ b/Lib/test/test_difflib.py
@@ -501,12 +501,58 @@ class TestJunkAPIs(unittest.TestCase):
for char in ['a', '#', '\n', '\f', '\r', '\v']:
self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))
+class TestFindLongest(unittest.TestCase):
+ def longer_match_exists(self, a, b, n):
+ return any(b_part in a for b_part in
+ [b[i:i + n + 1] for i in range(0, len(b) - n - 1)])
+
+ def test_default_args(self):
+ a = 'foo bar'
+ b = 'foo baz bar'
+ sm = difflib.SequenceMatcher(a=a, b=b)
+ match = sm.find_longest_match()
+ self.assertEqual(match.a, 0)
+ self.assertEqual(match.b, 0)
+ self.assertEqual(match.size, 6)
+ self.assertEqual(a[match.a: match.a + match.size],
+ b[match.b: match.b + match.size])
+ self.assertFalse(self.longer_match_exists(a, b, match.size))
+
+ match = sm.find_longest_match(alo=2, blo=4)
+ self.assertEqual(match.a, 3)
+ self.assertEqual(match.b, 7)
+ self.assertEqual(match.size, 4)
+ self.assertEqual(a[match.a: match.a + match.size],
+ b[match.b: match.b + match.size])
+ self.assertFalse(self.longer_match_exists(a[2:], b[4:], match.size))
+
+ match = sm.find_longest_match(bhi=5, blo=1)
+ self.assertEqual(match.a, 1)
+ self.assertEqual(match.b, 1)
+ self.assertEqual(match.size, 4)
+ self.assertEqual(a[match.a: match.a + match.size],
+ b[match.b: match.b + match.size])
+ self.assertFalse(self.longer_match_exists(a, b[1:5], match.size))
+
+ def test_longest_match_with_popular_chars(self):
+ a = 'dabcd'
+ b = 'd'*100 + 'abc' + 'd'*100 # length over 200 so popular used
+ sm = difflib.SequenceMatcher(a=a, b=b)
+ match = sm.find_longest_match(0, len(a), 0, len(b))
+ self.assertEqual(match.a, 0)
+ self.assertEqual(match.b, 99)
+ self.assertEqual(match.size, 5)
+ self.assertEqual(a[match.a: match.a + match.size],
+ b[match.b: match.b + match.size])
+ self.assertFalse(self.longer_match_exists(a, b, match.size))
+
+
def test_main():
difflib.HtmlDiff._default_prefix = 0
Doctests = doctest.DocTestSuite(difflib)
run_unittest(
TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
- TestOutputFormat, TestBytes, TestJunkAPIs, Doctests)
+ TestOutputFormat, TestBytes, TestJunkAPIs, TestFindLongest, Doctests)
if __name__ == '__main__':
test_main()