summaryrefslogtreecommitdiffstats
path: root/Lib/difflib.py
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2006-06-14 04:09:25 (GMT)
committerTim Peters <tim.peters@gmail.com>2006-06-14 04:09:25 (GMT)
commit43898b4f642acf182242744181143141228ca56e (patch)
tree7698daa8590f35042e112618973b26d2f28ab8b1 /Lib/difflib.py
parent5d7c290b7ba9353fa443e175fe02112aa53f21b5 (diff)
downloadcpython-43898b4f642acf182242744181143141228ca56e.zip
cpython-43898b4f642acf182242744181143141228ca56e.tar.gz
cpython-43898b4f642acf182242744181143141228ca56e.tar.bz2
SequenceMatcher.get_matching_blocks(): This now guarantees that
adjacent triples in the result list describe non-adjacent matching blocks. That's _nice_ to have, and Guido said he wanted it. Not a bugfix candidate: Guido or not ;-), this changes visible endcase semantics (note that some tests had to change), and nothing about this was documented before. Since it was working as designed, and behavior was consistent with the docs, it wasn't "a bug".
Diffstat (limited to 'Lib/difflib.py')
-rw-r--r--Lib/difflib.py42
1 files changed, 34 insertions, 8 deletions
diff --git a/Lib/difflib.py b/Lib/difflib.py
index 9a90710..347b647 100644
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -86,8 +86,7 @@ class SequenceMatcher:
>>> for block in s.get_matching_blocks():
... print "a[%d] and b[%d] match for %d elements" % block
a[0] and b[0] match for 8 elements
- a[8] and b[17] match for 6 elements
- a[14] and b[23] match for 15 elements
+ a[8] and b[17] match for 21 elements
a[29] and b[38] match for 0 elements
Note that the last tuple returned by .get_matching_blocks() is always a
@@ -101,8 +100,7 @@ class SequenceMatcher:
... print "%6s a[%d:%d] b[%d:%d]" % opcode
equal a[0:8] b[0:8]
insert a[8:8] b[8:17]
- equal a[8:14] b[17:23]
- equal a[14:29] b[23:38]
+ equal a[8:29] b[17:38]
See the Differ class for a fancy human-friendly file differencer, which
uses SequenceMatcher both to compare sequences of lines, and to compare
@@ -461,7 +459,11 @@ class SequenceMatcher:
Each triple is of the form (i, j, n), and means that
a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in
- i and in j.
+ i and in j. New in Python 2.5, it's also guaranteed that if
+ (i, j, n) and (i', j', n') are adjacent triples in the list, and
+ the second is not the last triple in the list, then i+n != i' or
+ j+n != j'. IOW, adjacent triples never describe adjacent equal
+ blocks.
The last triple is a dummy, (len(a), len(b), 0), and is the only
triple with n==0.
@@ -474,7 +476,6 @@ class SequenceMatcher:
if self.matching_blocks is not None:
return self.matching_blocks
la, lb = len(self.a), len(self.b)
- self.matching_blocks = matching_blocks = []
# This is most naturally expressed as a recursive algorithm, but
# at least one user bumped into extreme use cases that exceeded
@@ -483,6 +484,7 @@ class SequenceMatcher:
# results to `matching_blocks` in a loop; the matches are sorted
# at the end.
queue = [(0, la, 0, lb)]
+ matching_blocks = []
while queue:
alo, ahi, blo, bhi = queue.pop()
i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
@@ -496,8 +498,32 @@ class SequenceMatcher:
if i+k < ahi and j+k < bhi:
queue.append((i+k, ahi, j+k, bhi))
matching_blocks.sort()
- matching_blocks.append( (la, lb, 0) )
- return matching_blocks
+
+ # It's possible that we have adjacent equal blocks in the
+ # matching_blocks list now. Starting with 2.5, this code was added
+ # to collapse them.
+ i1 = j1 = k1 = 0
+ non_adjacent = []
+ for i2, j2, k2 in matching_blocks:
+ # Is this block adjacent to i1, j1, k1?
+ if i1 + k1 == i2 and j1 + k1 == j2:
+ # Yes, so collapse them -- this just increases the length of
+ # the first block by the length of the second, and the first
+ # block so lengthebd remains the block to compare against.
+ k1 += k2
+ else:
+ # Not adjacent. Remember the first block (k1==0 means it's
+ # the dummy we started with), and make the second block the
+ # new block to compare against.
+ if k1:
+ non_adjacent.append((i1, j1, k1))
+ i1, j1, k1 = i2, j2, k2
+ if k1:
+ non_adjacent.append((i1, j1, k1))
+
+ non_adjacent.append( (la, lb, 0) )
+ self.matching_blocks = non_adjacent
+ return self.matching_blocks
def get_opcodes(self):
"""Return list of 5-tuples describing how to turn a into b.