diff options
-rw-r--r-- | Lib/difflib.py | 201 |
1 files changed, 200 insertions, 1 deletions
diff --git a/Lib/difflib.py b/Lib/difflib.py index b09348f..202b815 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -6,12 +6,18 @@ Module difflib -- helpers for computing deltas between objects. Function get_close_matches(word, possibilities, n=3, cutoff=0.6): Use SequenceMatcher to return list of the best "good enough" matches. +Function context_diff(a, b): + For two lists of strings, return a delta in context diff format. + Function ndiff(a, b): Return a delta: the difference between `a` and `b` (lists of strings). Function restore(delta, which): Return one of the two sequences that generated an ndiff delta. +Function unified_diff(a, b): + For two lists of strings, return a delta in unified diff format. + Class SequenceMatcher: A flexible class for comparing pairs of sequences of any type. @@ -20,7 +26,8 @@ Class Differ: """ __all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher', - 'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK'] + 'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff', + 'unified_diff'] class SequenceMatcher: @@ -532,6 +539,54 @@ class SequenceMatcher: answer.append( ('equal', ai, i, bj, j) ) return answer + def get_grouped_opcodes(self, n=3): + """ Isolate change clusters by eliminating ranges with no changes. + + Return a generator of groups with upto n lines of context. + Each group is in the same format as returned by get_opcodes(). + + >>> from pprint import pprint + >>> a = map(str, range(1,40)) + >>> b = a[:] + >>> b[8:8] = ['i'] # Make an insertion + >>> b[20] += 'x' # Make a replacement + >>> b[23:28] = [] # Make a deletion + >>> b[30] += 'y' # Make another replacement + >>> pprint(list(SequenceMatcher(None,a,b).get_grouped_opcodes())) + [[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)], + [('equal', 16, 19, 17, 20), + ('replace', 19, 20, 20, 21), + ('equal', 20, 22, 21, 23), + ('delete', 22, 27, 23, 23), + ('equal', 27, 30, 23, 26)], + [('equal', 31, 34, 27, 30), + ('replace', 34, 35, 30, 31), + ('equal', 35, 38, 31, 34)]] + """ + + codes = self.get_opcodes() + # Fixup leading and trailing groups if they show no changes. + if codes[0][0] == 'equal': + tag, i1, i2, j1, j2 = codes[0] + codes[0] = tag, max(i1, i2-n), i2, max(j1, j2-n), j2 + if codes[-1][0] == 'equal': + tag, i1, i2, j1, j2 = codes[-1] + codes[-1] = tag, i1, min(i2, i1+n), j1, min(j2, j1+n) + + nn = n + n + group = [] + for tag, i1, i2, j1, j2 in codes: + # End the current group and start a new one whenever + # there is a large range with no changes. + if tag == 'equal' and i2-i1 > nn: + group.append((tag, i1, min(i2, i1+n), j1, min(j2, j1+n))) + yield group + group = [] + i1, j1 = max(i1, i2-n), max(j1, j2-n) + group.append((tag, i1, i2, j1 ,j2)) + if group and not (len(group)==1 and group[0][0] == 'equal'): + yield group + def ratio(self): """Return a measure of the sequences' similarity (float in [0,1]). @@ -1042,6 +1097,150 @@ def IS_CHARACTER_JUNK(ch, ws=" \t"): del re + +def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', + tofiledate='', n=3, lineterm='\n'): + r""" + Compare two sequences of lines; generate the delta as a unified diff. + + Unified diffs are a compact way of showing line changes and a few + lines of context. The number of context lines is set by 'n' which + defaults to three. + + By default, the diff control lines (those with *** or ---) are + created with a trailing newline. This is helpful so that inputs + created from file.readlines() result in diffs that are suitable for + file.writelines() since both the inputs and outputs have trailing + newlines. + + For inputs that do not have trailing newlines, set the lineterm + argument to "" so that the output will be uniformly newline free. + + The unidiff format normally has a header for filenames and modification + times. Any or all of these may be specified using strings for + 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification + times are normally expressed in the format returned by time.ctime(). + + Example: + + >>> for line in unified_diff('one two three four'.split(), + ... 'zero one tree four'.split(), 'Original', 'Current', + ... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003', + ... lineterm=''): + ... print line + --- Original Sat Jan 26 23:30:50 1991 + +++ Current Fri Jun 06 10:20:52 2003 + @@ -1,4 +1,4 @@ + +zero + one + -two + -three + +tree + four + """ + + started = False + for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n): + if not started: + yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm) + yield '+++ %s %s%s' % (tofile, tofiledate, lineterm) + started = True + i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] + yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm) + for tag, i1, i2, j1, j2 in group: + if tag == 'equal': + for line in a[i1:i2]: + yield ' ' + line + continue + if tag == 'replace' or tag == 'delete': + for line in a[i1:i2]: + yield '-' + line + if tag == 'replace' or tag == 'insert': + for line in b[j1:j2]: + yield '+' + line + +# See http://www.unix.org/single_unix_specification/ +def context_diff(a, b, fromfile='', tofile='', + fromfiledate='', tofiledate='', n=3, lineterm='\n'): + r""" + Compare two sequences of lines; generate the delta as a context diff. + + Context diffs are a compact way of showing line changes and a few + lines of context. The number of context lines is set by 'n' which + defaults to three. + + By default, the diff control lines (those with *** or ---) are + created with a trailing newline. This is helpful so that inputs + created from file.readlines() result in diffs that are suitable for + file.writelines() since both the inputs and outputs have trailing + newlines. + + For inputs that do not have trailing newlines, set the lineterm + argument to "" so that the output will be uniformly newline free. + + The context diff format normally has a header for filenames and + modification times. Any or all of these may be specified using + strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. + The modification times are normally expressed in the format returned + by time.ctime(). If not specified, the strings default to blanks. + + Example: + + >>> print ''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1), + ... 'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current', + ... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:22:46 2003')), + *** Original Sat Jan 26 23:30:50 1991 + --- Current Fri Jun 06 10:22:46 2003 + *************** + *** 1,4 **** + one + ! two + ! three + four + --- 1,4 ---- + + zero + one + ! tree + four + """ + + started = False + prefixmap = dict(insert='+ ', delete='- ', replace='! ', equal=' ') + for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n): + if not started: + yield '*** %s %s%s' % (fromfile, fromfiledate, lineterm) + yield '--- %s %s%s' % (tofile, tofiledate, lineterm) + started = True + yield '***************%s' % (lineterm,) + if group[-1][2] - group[0][1] >= 2: + yield '*** %d,%d ****%s' % (group[0][1]+1, group[-1][2], lineterm) + else: + yield '*** %d ****%s' % (group[-1][2], lineterm) + empty = True + for tag, i1, i2, j1, j2 in group: + if tag == 'replace' or tag == 'delete': + empty = False + break + if not empty: + for tag, i1, i2, j1, j2 in group: + if tag != 'insert': + for line in a[i1:i2]: + yield prefixmap[tag] + line + if group[-1][4] - group[0][3] >= 2: + yield '--- %d,%d ----%s' % (group[0][3]+1, group[-1][4], lineterm) + else: + yield '--- %d ----%s' % (group[-1][4], lineterm) + empty = True + for tag, i1, i2, j1, j2 in group: + if tag == 'replace' or tag == 'insert': + empty = False + break + if not empty: + for tag, i1, i2, j1, j2 in group: + if tag != 'delete': + for line in b[j1:j2]: + yield prefixmap[tag] + line + def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK): r""" Compare `a` and `b` (lists of strings); return a `Differ`-style delta. |