summaryrefslogtreecommitdiffstats
path: root/Lib/difflib.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/difflib.py')
-rw-r--r--Lib/difflib.py42
1 files changed, 34 insertions, 8 deletions
diff --git a/Lib/difflib.py b/Lib/difflib.py
index 9a90710..347b647 100644
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -86,8 +86,7 @@ class SequenceMatcher:
>>> for block in s.get_matching_blocks():
... print "a[%d] and b[%d] match for %d elements" % block
a[0] and b[0] match for 8 elements
- a[8] and b[17] match for 6 elements
- a[14] and b[23] match for 15 elements
+ a[8] and b[17] match for 21 elements
a[29] and b[38] match for 0 elements
Note that the last tuple returned by .get_matching_blocks() is always a
@@ -101,8 +100,7 @@ class SequenceMatcher:
... print "%6s a[%d:%d] b[%d:%d]" % opcode
equal a[0:8] b[0:8]
insert a[8:8] b[8:17]
- equal a[8:14] b[17:23]
- equal a[14:29] b[23:38]
+ equal a[8:29] b[17:38]
See the Differ class for a fancy human-friendly file differencer, which
uses SequenceMatcher both to compare sequences of lines, and to compare
@@ -461,7 +459,11 @@ class SequenceMatcher:
Each triple is of the form (i, j, n), and means that
a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in
- i and in j.
+ i and in j. New in Python 2.5, it's also guaranteed that if
+ (i, j, n) and (i', j', n') are adjacent triples in the list, and
+ the second is not the last triple in the list, then i+n != i' or
+ j+n != j'. IOW, adjacent triples never describe adjacent equal
+ blocks.
The last triple is a dummy, (len(a), len(b), 0), and is the only
triple with n==0.
@@ -474,7 +476,6 @@ class SequenceMatcher:
if self.matching_blocks is not None:
return self.matching_blocks
la, lb = len(self.a), len(self.b)
- self.matching_blocks = matching_blocks = []
# This is most naturally expressed as a recursive algorithm, but
# at least one user bumped into extreme use cases that exceeded
@@ -483,6 +484,7 @@ class SequenceMatcher:
# results to `matching_blocks` in a loop; the matches are sorted
# at the end.
queue = [(0, la, 0, lb)]
+ matching_blocks = []
while queue:
alo, ahi, blo, bhi = queue.pop()
i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
@@ -496,8 +498,32 @@ class SequenceMatcher:
if i+k < ahi and j+k < bhi:
queue.append((i+k, ahi, j+k, bhi))
matching_blocks.sort()
- matching_blocks.append( (la, lb, 0) )
- return matching_blocks
+
+ # It's possible that we have adjacent equal blocks in the
+ # matching_blocks list now. Starting with 2.5, this code was added
+ # to collapse them.
+ i1 = j1 = k1 = 0
+ non_adjacent = []
+ for i2, j2, k2 in matching_blocks:
+ # Is this block adjacent to i1, j1, k1?
+ if i1 + k1 == i2 and j1 + k1 == j2:
+ # Yes, so collapse them -- this just increases the length of
+ # the first block by the length of the second, and the first
+ # block so lengthebd remains the block to compare against.
+ k1 += k2
+ else:
+ # Not adjacent. Remember the first block (k1==0 means it's
+ # the dummy we started with), and make the second block the
+ # new block to compare against.
+ if k1:
+ non_adjacent.append((i1, j1, k1))
+ i1, j1, k1 = i2, j2, k2
+ if k1:
+ non_adjacent.append((i1, j1, k1))
+
+ non_adjacent.append( (la, lb, 0) )
+ self.matching_blocks = non_adjacent
+ return self.matching_blocks
def get_opcodes(self):
"""Return list of 5-tuples describing how to turn a into b.