From e064b41f5ac046fc361fa80af551f5bfab01141c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Sun, 29 Aug 2004 16:34:40 +0000 Subject: Patch #914575: difflib side by side diff support, diff.py s/b/s HTML option. --- Doc/lib/libdifflib.tex | 73 ++++ Lib/difflib.py | 679 +++++++++++++++++++++++++++++++++++++- Lib/test/test_difflib.py | 128 ++++++- Lib/test/test_difflib_expect.html | 526 +++++++++++++++++++++++++++++ Misc/ACKS | 1 + Misc/NEWS | 4 + Tools/scripts/diff.py | 8 +- 7 files changed, 1412 insertions(+), 7 deletions(-) create mode 100644 Lib/test/test_difflib_expect.html diff --git a/Doc/lib/libdifflib.tex b/Doc/lib/libdifflib.tex index fc588ff..4256e87 100644 --- a/Doc/lib/libdifflib.tex +++ b/Doc/lib/libdifflib.tex @@ -52,6 +52,79 @@ characters. \end{classdesc*} +\begin{classdesc*}{HtmlDiff} + + This class can be used to create an HTML table (or a complete HTML file + containing the table) showing a side by side, line by line comparision + of text with inter-line and intra-line change highlights. The table can + be generated in either full or contextual difference mode. + + The constructor for this class is: + + \begin{funcdesc}{__init__}{ + \optional{, tabsize + \optional{, wrapcolumn + \optional{, linejunk + \optional{, charjunk}}}}} + + Initializes instance of \class{HtmlDiff}. + + \var{tabsize} is an optional keyword argument to specify tab stop spacing + and defaults to \code{8}. + + \var{wrapcolumn} is an optional keyword to specify column number where + lines are broken and wrapped, defaults to \code{None} where lines are not + wrapped. + + \var{linejunk} and \var{charjunk} are optional keyword arguments passed + into \code{ndiff()} (used to by \class{HtmlDiff} to generate the + side by side HTML differences). See \code{ndiff()} documentation for + argument default values and descriptions. + \end{funcdesc} + + The following methods are public: + + \begin{funcdesc}{make_file}{fromlines, tolines + \optional{, fromdesc + \optional{, todesc + \optional{, context + \optional{, numlines}}}}} + Compares \var{fromlines} and \var{tolines} (lists of strings) and returns + a string which is a complete HTML file containing a table showing line by + line differences with inter-line and intra-line changes highlighted. + + \var{fromdesc} and \var{todesc} are optional keyword arguments to specify + from/to file column header strings (both default to an empty string). + + \var{context} and \var{numlines} are both optional keyword arguments. + Set \var{context} to \code{True} when contextual differences are to be + shown, else the default is \code{False} to show the full files. + \var{numlines} defaults to \code{5}. When \var{context} is \code{True} + \var{numlines} controls the number of context lines which surround the + difference highlights. When \var{context} is \code{False} \var{numlines} + controls the number of lines which are shown before a difference + highlight when using the "next" hyperlinks (setting to zero would cause + the "next" hyperlinks to place the next difference highlight at the top of + the browser without any leading context). + \end{funcdesc} + + \begin{funcdesc}{make_table}{fromlines, tolines + \optional{, fromdesc + \optional{, todesc + \optional{, context}}}} + Compares \var{fromlines} and \var{tolines} (lists of strings) and returns + a string which is a complete HTML table showing line by line differences + with inter-line and intra-line changes highlighted. + + The arguments of this method are a subset of those for the + \code{make_file} method. Refer to the \code{make_file} method + documentation. + \end{funcdesc} + + \file{Tools/scripts/ndiff.py} is a command-line front-end to this class + and contains a good example of its use. +\end{classdesc*} + \begin{funcdesc}{context_diff}{a, b\optional{, fromfile\optional{, tofile \optional{, fromfiledate\optional{, tofiledate\optional{, n \optional{, lineterm}}}}}}} diff --git a/Lib/difflib.py b/Lib/difflib.py index 7cd5197..aa205f7 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -23,11 +23,14 @@ Class SequenceMatcher: Class Differ: For producing human-readable deltas from sequences of lines of text. + +Class HtmlDiff: + For producing HTML side by side comparison with change highlights. """ __all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher', 'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff', - 'unified_diff'] + 'unified_diff', 'HtmlDiff'] import heapq @@ -1101,8 +1104,6 @@ def IS_CHARACTER_JUNK(ch, ws=" \t"): return ch in ws -del re - def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', tofiledate='', n=3, lineterm='\n'): @@ -1277,6 +1278,678 @@ def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK): """ return Differ(linejunk, charjunk).compare(a, b) +def _mdiff(fromlines, tolines, context=None, linejunk=None, + charjunk=IS_CHARACTER_JUNK): + """Returns generator yielding marked up from/to side by side differences. + + Arguments: + fromlines -- list of text lines to compared to tolines + tolines -- list of text lines to be compared to fromlines + context -- number of context lines to display on each side of difference, + if None, all from/to text lines will be generated. + linejunk -- passed on to ndiff (see ndiff documentation) + charjunk -- passed on to ndiff (see ndiff documentation) + + This function returns an interator which returns a tuple: + (from line tuple, to line tuple, boolean flag) + + from/to line tuple -- (line num, line text) + line num -- integer or None (to indicate a context seperation) + line text -- original line text with following markers inserted: + '\0+' -- marks start of added text + '\0-' -- marks start of deleted text + '\0^' -- marks start of changed text + '\1' -- marks end of added/deleted/changed text + + boolean flag -- None indicates context separation, True indicates + either "from" or "to" line contains a change, otherwise False. + + This function/iterator was originally developed to generate side by side + file difference for making HTML pages (see HtmlDiff class for example + usage). + + Note, this function utilizes the ndiff function to generate the side by + side difference markup. Optional ndiff arguments may be passed to this + function and they in turn will be passed to ndiff. + """ + import re + + # regular expression for finding intraline change indices + change_re = re.compile('(\++|\-+|\^+)') + + # create the difference iterator to generate the differences + diff_lines_iterator = ndiff(fromlines,tolines,linejunk,charjunk) + + def _make_line(lines, format_key, side, num_lines=[0,0]): + """Returns line of text with user's change markup and line formatting. + + lines -- list of lines from the ndiff generator to produce a line of + text from. When producing the line of text to return, the + lines used are removed from this list. + format_key -- '+' return first line in list with "add" markup around + the entire line. + '-' return first line in list with "delete" markup around + the entire line. + '?' return first line in list with add/delete/change + intraline markup (indices obtained from second line) + None return first line in list with no markup + side -- indice into the num_lines list (0=from,1=to) + num_lines -- from/to current line number. This is NOT intended to be a + passed parameter. It is present as a keyword argument to + maintain memory of the current line numbers between calls + of this function. + + Note, this function is purposefully not defined at the module scope so + that data it needs from its parent function (within whose context it + is defined) does not need to be of module scope. + """ + num_lines[side] += 1 + # Handle case where no user markup is to be added, just return line of + # text with user's line format to allow for usage of the line number. + if format_key is None: + return (num_lines[side],lines.pop(0)[2:]) + # Handle case of intraline changes + if format_key == '?': + text, markers = lines.pop(0), lines.pop(0) + # find intraline changes (store change type and indices in tuples) + sub_info = [] + def record_sub_info(match_object,sub_info=sub_info): + sub_info.append([match_object.group(1)[0],match_object.span()]) + return match_object.group(1) + change_re.sub(record_sub_info,markers) + # process each tuple inserting our special marks that won't be + # noticed by an xml/html escaper. + for key,(begin,end) in sub_info[::-1]: + text = text[0:begin]+'\0'+key+text[begin:end]+'\1'+text[end:] + text = text[2:] + # Handle case of add/delete entire line + else: + text = lines.pop(0)[2:] + # if line of text is just a newline, insert a space so there is + # something for the user to highlight and see. + if len(text) <= 1: + text = ' '+text + # insert marks that won't be noticed by an xml/html escaper. + text = '\0' + format_key + text + '\1' + # Return line of text, first allow user's line formatter to do it's + # thing (such as adding the line number) then replace the special + # marks with what the user's change markup. + return (num_lines[side],text) + + def _line_iterator(): + """Yields from/to lines of text with a change indication. + + This function is an iterator. It itself pulls lines from a + differencing iterator, processes them and yields them. When it can + it yields both a "from" and a "to" line, otherwise it will yield one + or the other. In addition to yielding the lines of from/to text, a + boolean flag is yielded to indicate if the text line(s) have + differences in them. + + Note, this function is purposefully not defined at the module scope so + that data it needs from its parent function (within whose context it + is defined) does not need to be of module scope. + """ + lines = [] + num_blanks_pending, num_blanks_to_yield = 0, 0 + while True: + # Load up next 4 lines so we can look ahead, create strings which + # are a concatenation of the first character of each of the 4 lines + # so we can do some very readable comparisons. + while len(lines) < 4: + try: + lines.append(diff_lines_iterator.next()) + except StopIteration: + lines.append('X') + s = ''.join([line[0] for line in lines]) + if s.startswith('X'): + # When no more lines, pump out any remaining blank lines so the + # corresponding add/delete lines get a matching blank line so + # all line pairs get yielded at the next level. + num_blanks_to_yield = num_blanks_pending + elif s.startswith('-?+?'): + # simple intraline change + yield _make_line(lines,'?',0), _make_line(lines,'?',1), True + continue + elif s.startswith('--++'): + # in delete block, add block coming: we do NOT want to get + # caught up on blank lines yet, just process the delete line + num_blanks_pending -= 1 + yield _make_line(lines,'-',0), None, True + continue + elif s.startswith('--?+') or s.startswith('--+') or \ + s.startswith('- '): + # in delete block and see a intraline change or unchanged line + # coming: yield the delete line and then blanks + from_line,to_line = _make_line(lines,'-',0), None + num_blanks_to_yield,num_blanks_pending = num_blanks_pending-1,0 + elif s.startswith('-+?'): + # intraline change + yield _make_line(lines,None,0), _make_line(lines,'?',1), True + continue + elif s.startswith('-?+'): + # intraline change + yield _make_line(lines,'?',0), _make_line(lines,None,1), True + continue + elif s.startswith('-'): + # delete FROM line + num_blanks_pending -= 1 + yield _make_line(lines,'-',0), None, True + continue + elif s.startswith('+--'): + # in add block, delete block coming: we do NOT want to get + # caught up on blank lines yet, just process the add line + num_blanks_pending += 1 + yield None, _make_line(lines,'+',1), True + continue + elif s.startswith('+ ') or s.startswith('+-'): + # will be leaving an add block: yield blanks then add line + from_line, to_line = None, _make_line(lines,'+',1) + num_blanks_to_yield,num_blanks_pending = num_blanks_pending+1,0 + elif s.startswith('+'): + # inside an add block, yield the add line + num_blanks_pending += 1 + yield None, _make_line(lines,'+',1), True + continue + elif s.startswith(' '): + # unchanged text, yield it to both sides + yield _make_line(lines[:],None,0),_make_line(lines,None,1),False + continue + # Catch up on the blank lines so when we yield the next from/to + # pair, they are lined up. + while(num_blanks_to_yield < 0): + num_blanks_to_yield += 1 + yield None,('','\n'),True + while(num_blanks_to_yield > 0): + num_blanks_to_yield -= 1 + yield ('','\n'),None,True + if s.startswith('X'): + raise StopIteration + else: + yield from_line,to_line,True + + def _line_pair_iterator(): + """Yields from/to lines of text with a change indication. + + This function is an iterator. It itself pulls lines from the line + iterator. It's difference from that iterator is that this function + always yields a pair of from/to text lines (with the change + indication). If necessary it will collect single from/to lines + until it has a matching pair from/to pair to yield. + + Note, this function is purposefully not defined at the module scope so + that data it needs from its parent function (within whose context it + is defined) does not need to be of module scope. + """ + line_iterator = _line_iterator() + fromlines,tolines=[],[] + while True: + # Collecting lines of text until we have a from/to pair + while (len(fromlines)==0 or len(tolines)==0): + from_line, to_line, found_diff =line_iterator.next() + if from_line is not None: + fromlines.append((from_line,found_diff)) + if to_line is not None: + tolines.append((to_line,found_diff)) + # Once we have a pair, remove them from the collection and yield it + from_line, fromDiff = fromlines.pop(0) + to_line, to_diff = tolines.pop(0) + yield (from_line,to_line,fromDiff or to_diff) + + # Handle case where user does not want context differencing, just yield + # them up without doing anything else with them. + line_pair_iterator = _line_pair_iterator() + if context is None: + while True: + yield line_pair_iterator.next() + # Handle case where user wants context differencing. We must do some + # storage of lines until we know for sure that they are to be yielded. + else: + context += 1 + lines_to_write = 0 + while True: + # Store lines up until we find a difference, note use of a + # circular queue because we only need to keep around what + # we need for context. + index, contextLines = 0, [None]*(context) + found_diff = False + while(found_diff is False): + from_line, to_line, found_diff = line_pair_iterator.next() + i = index % context + contextLines[i] = (from_line, to_line, found_diff) + index += 1 + # Yield lines that we have collected so far, but first yield + # the user's separator. + if index > context: + yield None, None, None + lines_to_write = context + else: + lines_to_write = index + index = 0 + while(lines_to_write): + i = index % context + index += 1 + yield contextLines[i] + lines_to_write -= 1 + # Now yield the context lines after the change + lines_to_write = context-1 + while(lines_to_write): + from_line, to_line, found_diff = line_pair_iterator.next() + # If another change within the context, extend the context + if found_diff: + lines_to_write = context-1 + else: + lines_to_write -= 1 + yield from_line, to_line, found_diff + + +_file_template = """ + + + + + + + + + + + + %(table)s%(legend)s + + +""" + +_styles = """ + table.diff {font-family:Courier; border:medium;} + .diff_header {background-color:#e0e0e0} + td.diff_header {text-align:right} + .diff_next {background-color:#c0c0c0} + .diff_add {background-color:#aaffaa} + .diff_chg {background-color:#ffff77} + .diff_sub {background-color:#ffaaaa}""" + +_table_template = """ + + + + %(header_row)s + +%(data_rows)s +
""" + +_legend = """ + + + + +
Legends
+ + + + +
Colors
 Added 
Changed
Deleted
+ + + + +
Links
(f)irst change
(n)ext change
(t)op
""" + +class HtmlDiff(object): + """For producing HTML side by side comparison with change highlights. + + This class can be used to create an HTML table (or a complete HTML file + containing the table) showing a side by side, line by line comparision + of text with inter-line and intra-line change highlights. The table can + be generated in either full or contextual difference mode. + + The following methods are provided for HTML generation: + + make_table -- generates HTML for a single side by side table + make_file -- generates complete HTML file with a single side by side table + + See tools/scripts/diff.py for an example usage of this class. + """ + + _file_template = _file_template + _styles = _styles + _table_template = _table_template + _legend = _legend + _default_prefix = 0 + + def __init__(self,tabsize=8,wrapcolumn=None,linejunk=None, + charjunk=IS_CHARACTER_JUNK): + """HtmlDiff instance initializer + + Arguments: + tabsize -- tab stop spacing, defaults to 8. + wrapcolumn -- column number where lines are broken and wrapped, + defaults to None where lines are not wrapped. + linejunk,charjunk -- keyword arguments passed into ndiff() (used to by + HtmlDiff() to generate the side by side HTML differences). See + ndiff() documentation for argument default values and descriptions. + """ + self._tabsize = tabsize + self._wrapcolumn = wrapcolumn + self._linejunk = linejunk + self._charjunk = charjunk + + def make_file(self,fromlines,tolines,fromdesc='',todesc='',context=False, + numlines=5): + """Returns HTML file of side by side comparison with change highlights + + Arguments: + fromlines -- list of "from" lines + tolines -- list of "to" lines + fromdesc -- "from" file column header string + todesc -- "to" file column header string + context -- set to True for contextual differences (defaults to False + which shows full differences). + numlines -- number of context lines. When context is set True, + controls number of lines displayed before and after the change. + When context is False, controls the number of lines to place + the "next" link anchors before the next change (so click of + "next" link jumps to just before the change). + """ + + return self._file_template % dict( + styles = self._styles, + legend = self._legend, + table = self.make_table(fromlines,tolines,fromdesc,todesc, + context=context,numlines=numlines)) + + def _tab_newline_replace(self,fromlines,tolines): + """Returns from/to line lists with tabs expanded and newlines removed. + + Instead of tab characters being replaced by the number of spaces + needed to fill in to the next tab stop, this function will fill + the space with tab characters. This is done so that the difference + algorithms can identify changes in a file when tabs are replaced by + spaces and vice versa. At the end of the HTML generation, the tab + characters will be replaced with a nonbreakable space. + """ + def expand_tabs(line): + # hide real spaces + line = line.replace(' ','\0') + # expand tabs into spaces + line = line.expandtabs(self._tabsize) + # relace spaces from expanded tabs back into tab characters + # (we'll replace them with markup after we do differencing) + line = line.replace(' ','\t') + return line.replace('\0',' ').rstrip('\n') + fromlines = [expand_tabs(line) for line in fromlines] + tolines = [expand_tabs(line) for line in tolines] + return fromlines,tolines + + def _split_line(self,data_list,line_num,text): + """Builds list of text lines by splitting text lines at wrap point + + This function will determine if the input text line needs to be + wrapped (split) into separate lines. If so, the first wrap point + will be determined and the first line appended to the output + text line list. This function is used recursively to handle + the second part of the split line to further split it. + """ + # if blank line or context separator, just add it to the output list + if not line_num: + data_list.append((line_num,text)) + return + + # if line text doesn't need wrapping, just add it to the output list + size = len(text) + max = self._wrapcolumn + if (size <= max) or ((size -(text.count('\0')*3)) <= max): + data_list.append((line_num,text)) + return + + # scan text looking for the wrap point, keeping track if the wrap + # point is inside markers + i = 0 + n = 0 + mark = '' + while n < max and i < size: + if text[i] == '\0': + i += 1 + mark = text[i] + i += 1 + elif text[i] == '\1': + i += 1 + mark = '' + else: + i += 1 + n += 1 + + # wrap point is inside text, break it up into separate lines + line1 = text[:i] + line2 = text[i:] + + # if wrap point is inside markers, place end marker at end of first + # line and start marker at beginning of second line because each + # line will have its own table tag markup around it. + if mark: + line1 = line1 + '\1' + line2 = '\0' + mark + line2 + + # tack on first line onto the output list + data_list.append((line_num,line1)) + + # use this routine again to wrap the remaining text + self._split_line(data_list,'>',line2) + + def _line_wrapper(self,diffs): + """Returns iterator that splits (wraps) mdiff text lines""" + + # pull from/to data and flags from mdiff iterator + for fromdata,todata,flag in diffs: + # check for context separators and pass them through + if flag is None: + yield fromdata,todata,flag + continue + (fromline,fromtext),(toline,totext) = fromdata,todata + # for each from/to line split it at the wrap column to form + # list of text lines. + fromlist,tolist = [],[] + self._split_line(fromlist,fromline,fromtext) + self._split_line(tolist,toline,totext) + # yield from/to line in pairs inserting blank lines as + # necessary when one side has more wrapped lines + while fromlist or tolist: + if fromlist: + fromdata = fromlist.pop(0) + else: + fromdata = ('',' ') + if tolist: + todata = tolist.pop(0) + else: + todata = ('',' ') + yield fromdata,todata,flag + + def _collect_lines(self,diffs): + """Collects mdiff output into separate lists + + Before storing the mdiff from/to data into a list, it is converted + into a single line of text with HTML markup. + """ + + fromlist,tolist,flaglist = [],[],[] + # pull from/to data and flags from mdiff style iterator + for fromdata,todata,flag in diffs: + try: + # store HTML markup of the lines into the lists + fromlist.append(self._format_line(0,flag,*fromdata)) + tolist.append(self._format_line(1,flag,*todata)) + except TypeError: + # exceptions occur for lines where context separators go + fromlist.append(None) + tolist.append(None) + flaglist.append(flag) + return fromlist,tolist,flaglist + + def _format_line(self,side,flag,linenum,text): + """Returns HTML markup of "from" / "to" text lines + + side -- 0 or 1 indicating "from" or "to" text + flag -- indicates if difference on line + linenum -- line number (used for line number column) + text -- line text to be marked up + """ + try: + linenum = '%d' % linenum + id = ' id="%s%s"' % (self._prefix[side],linenum) + except TypeError: + # handle blank lines where linenum is '>' or '' + id = '' + # replace those things that would get confused with HTML symbols + text=text.replace("&","&").replace(">",">").replace("<","<") + + # make space non-breakable so they don't get compressed or line wrapped + text = text.replace(' ',' ').rstrip() + + return '%s%s' \ + % (id,linenum,text) + + def _make_prefix(self): + """Create unique anchor prefixes""" + + # Generate a unique anchor prefix so multiple tables + # can exist on the same HTML page without conflicts. + fromprefix = "from%d_" % HtmlDiff._default_prefix + toprefix = "to%d_" % HtmlDiff._default_prefix + HtmlDiff._default_prefix += 1 + # store prefixes so line format method has access + self._prefix = [fromprefix,toprefix] + + def _convert_flags(self,fromlist,tolist,flaglist,context,numlines): + """Makes list of "next" links""" + + # all anchor names will be generated using the unique "to" prefix + toprefix = self._prefix[1] + + # process change flags, generating middle column of next anchors/links + next_id = ['']*len(flaglist) + next_href = ['']*len(flaglist) + num_chg, in_change = 0, False + last = 0 + for i,flag in enumerate(flaglist): + if flag: + if not in_change: + in_change = True + last = i + # at the beginning of a change, drop an anchor a few lines + # (the context lines) before the change for the previous + # link + i = max([0,i-numlines]) + next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix,num_chg) + # at the beginning of a change, drop a link to the next + # change + num_chg += 1 + next_href[last] = 'n' % ( + toprefix,num_chg) + else: + in_change = False + # check for cases where there is no content to avoid exceptions + if not flaglist: + flaglist = [False] + next_id = [''] + next_href = [''] + last = 0 + if context: + fromlist = [' No Differences Found '] + tolist = fromlist + else: + fromlist = tolist = [' Empty File '] + # if not a change on first line, drop a link + if not flaglist[0]: + next_href[0] = 'f' % toprefix + # redo the last link to link to the top + next_href[last] = 't' % (toprefix) + + return fromlist,tolist,flaglist,next_href,next_id + + def make_table(self,fromlines,tolines,fromdesc='',todesc='',context=False, + numlines=5): + """Returns HTML table of side by side comparison with change highlights + + Arguments: + fromlines -- list of "from" lines + tolines -- list of "to" lines + fromdesc -- "from" file column header string + todesc -- "to" file column header string + context -- set to True for contextual differences (defaults to False + which shows full differences). + numlines -- number of context lines. When context is set True, + controls number of lines displayed before and after the change. + When context is False, controls the number of lines to place + the "next" link anchors before the next change (so click of + "next" link jumps to just before the change). + """ + + # make unique anchor prefixes so that multiple tables may exist + # on the same page without conflict. + self._make_prefix() + + # change tabs to spaces before it gets more difficult after we insert + # markkup + fromlines,tolines = self._tab_newline_replace(fromlines,tolines) + + # create diffs iterator which generates side by side from/to data + if context: + context_lines = numlines + else: + context_lines = None + diffs = _mdiff(fromlines,tolines,context_lines,linejunk=self._linejunk, + charjunk=self._charjunk) + + # set up iterator to wrap lines that exceed desired width + if self._wrapcolumn: + diffs = self._line_wrapper(diffs) + + # collect up from/to lines and flags into lists (also format the lines) + fromlist,tolist,flaglist = self._collect_lines(diffs) + + # process change flags, generating middle column of next anchors/links + fromlist,tolist,flaglist,next_href,next_id = self._convert_flags( + fromlist,tolist,flaglist,context,numlines) + + import cStringIO + s = cStringIO.StringIO() + fmt = ' %s%s' + \ + '%s%s\n' + for i in range(len(flaglist)): + if flaglist[i] is None: + # mdiff yields None on separator lines skip the bogus ones + # generated for the first line + if i > 0: + s.write(' \n \n') + else: + s.write( fmt % (next_id[i],next_href[i],fromlist[i], + next_href[i],tolist[i])) + if fromdesc or todesc: + header_row = '%s%s%s%s' % ( + '
', + '%s' % fromdesc, + '
', + '%s' % todesc) + else: + header_row = '' + + table = self._table_template % dict( + data_rows=s.getvalue(), + header_row=header_row, + prefix=self._prefix[1]) + + return table.replace('\0+',''). \ + replace('\0-',''). \ + replace('\0^',''). \ + replace('\1',''). \ + replace('\t',' ') + +del re + def restore(delta, which): r""" Generate one of the two sequences that generated a delta. diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index 9819c84..37fc548 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -1,5 +1,5 @@ import difflib -from test import test_support +from test.test_support import run_unittest, findfile import unittest import doctest @@ -19,6 +19,130 @@ class TestSFbugs(unittest.TestCase): diff_gen = difflib.unified_diff([], []) self.assertRaises(StopIteration, diff_gen.next) +patch914575_from1 = """ + 1. Beautiful is beTTer than ugly. + 2. Explicit is better than implicit. + 3. Simple is better than complex. + 4. Complex is better than complicated. +""" + +patch914575_to1 = """ + 1. Beautiful is better than ugly. + 3. Simple is better than complex. + 4. Complicated is better than complex. + 5. Flat is better than nested. +""" + +patch914575_from2 = """ +\t\tLine 1: preceeded by from:[tt] to:[ssss] + \t\tLine 2: preceeded by from:[sstt] to:[sssst] + \t \tLine 3: preceeded by from:[sstst] to:[ssssss] +Line 4: \thas from:[sst] to:[sss] after : +Line 5: has from:[t] to:[ss] at end\t +""" + +patch914575_to2 = """ + Line 1: preceeded by from:[tt] to:[ssss] + \tLine 2: preceeded by from:[sstt] to:[sssst] + Line 3: preceeded by from:[sstst] to:[ssssss] +Line 4: has from:[sst] to:[sss] after : +Line 5: has from:[t] to:[ss] at end +""" + +patch914575_from3 = """line 0 +1234567890123456789012345689012345 +line 1 +line 2 +line 3 +line 4 changed +line 5 changed +line 6 changed +line 7 +line 8 subtracted +line 9 +1234567890123456789012345689012345 +short line +just fits in!! +just fits in two lines yup!! +the end""" + +patch914575_to3 = """line 0 +1234567890123456789012345689012345 +line 1 +line 2 added +line 3 +line 4 chanGEd +line 5a chanGed +line 6a changEd +line 7 +line 8 +line 9 +1234567890 +another long line that needs to be wrapped +just fitS in!! +just fits in two lineS yup!! +the end""" + +class TestSFpatches(unittest.TestCase): + + def test_html_diff(self): + # Check SF patch 914575 for generating HTML differences + f1a = ((patch914575_from1 + '123\n'*10)*3) + t1a = (patch914575_to1 + '123\n'*10)*3 + f1b = '456\n'*10 + f1a + t1b = '456\n'*10 + t1a + f1a = f1a.splitlines() + t1a = t1a.splitlines() + f1b = f1b.splitlines() + t1b = t1b.splitlines() + f2 = patch914575_from2.splitlines() + t2 = patch914575_to2.splitlines() + f3 = patch914575_from3 + t3 = patch914575_to3 + i = difflib.HtmlDiff() + j = difflib.HtmlDiff(tabsize=2) + k = difflib.HtmlDiff(wrapcolumn=14) + + full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5) + tables = '\n'.join( + [ + '

Context (first diff within numlines=5(default))

', + i.make_table(f1a,t1a,'from','to',context=True), + '

Context (first diff after numlines=5(default))

', + i.make_table(f1b,t1b,'from','to',context=True), + '

Context (numlines=6)

', + i.make_table(f1a,t1a,'from','to',context=True,numlines=6), + '

Context (numlines=0)

', + i.make_table(f1a,t1a,'from','to',context=True,numlines=0), + '

Same Context

', + i.make_table(f1a,f1a,'from','to',context=True), + '

Same Full

', + i.make_table(f1a,f1a,'from','to',context=False), + '

Empty Context

', + i.make_table([],[],'from','to',context=True), + '

Empty Full

', + i.make_table([],[],'from','to',context=False), + '

tabsize=2

', + j.make_table(f2,t2), + '

tabsize=default

', + i.make_table(f2,t2), + '

Context (wrapcolumn=14,numlines=0)

', + k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0), + '

wrapcolumn=14,splitlines()

', + k.make_table(f3.splitlines(),t3.splitlines()), + '

wrapcolumn=14,splitlines(True)

', + k.make_table(f3.splitlines(True),t3.splitlines(True)), + ]) + actual = full.replace('','\n%s\n' % tables) + # temporarily uncomment next three lines to baseline this test + #f = open('test_difflib_expect.html','w') + #f.write(actual) + #f.close() + expect = open(findfile('test_difflib_expect.html')).read() + + + self.assertEqual(actual,expect) + Doctests = doctest.DocTestSuite(difflib) -test_support.run_unittest(TestSFbugs, Doctests) +run_unittest(TestSFpatches, TestSFbugs, Doctests) diff --git a/Lib/test/test_difflib_expect.html b/Lib/test/test_difflib_expect.html new file mode 100644 index 0000000..497e37d --- /dev/null +++ b/Lib/test/test_difflib_expect.html @@ -0,0 +1,526 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

from
to
f1f1
n2   1. Beautiful is beTTer than ugly.n2   1. Beautiful is better than ugly.
3   2. Explicit is better than implicit.
4   3. Simple is better than complex.3   3.   Simple is better than complex.
5   4. Complex is better than complicated.4   4. Complicated is better than complex.
5   5. Flat is better than nested.
61236123
71237123
81238123
91239123
1012310123
1112311123
1212312123
1312313123
1412314123
1512315123
1616
n17   1. Beautiful is beTTer than ugly.n17   1. Beautiful is better than ugly.
18   2. Explicit is better than implicit.
19   3. Simple is better than complex.18   3.   Simple is better than complex.
20   4. Complex is better than complicated.19   4. Complicated is better than complex.
20   5. Flat is better than nested.
2112321123
2212322123
2312323123
2412324123
2512325123
2612326123
2712327123
2812328123
2912329123
3012330123
3131
t32   1. Beautiful is beTTer than ugly.t32   1. Beautiful is better than ugly.
33   2. Explicit is better than implicit.
34   3. Simple is better than complex.33   3.   Simple is better than complex.
35   4. Complex is better than complicated.34   4. Complicated is better than complex.
35   5. Flat is better than nested.
3612336123
3712337123
3812338123
3912339123
4012340123
4112341123
4212342123
4312343123
4412344123
4512345123
+ + + + +
Legends
+ + + + +
Colors
 Added 
Changed
Deleted
+ + + + +
Links
(f)irst change
(n)ext change
(t)op
+ +

Context (first diff within numlines=5(default))

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

from
to
f1f1
n2   1. Beautiful is beTTer than ugly.n2   1. Beautiful is better than ugly.
3   2. Explicit is better than implicit.
4   3. Simple is better than complex.3   3.   Simple is better than complex.
5   4. Complex is better than complicated.4   4. Complicated is better than complex.
5   5. Flat is better than nested.
61236123
71237123
81238123
91239123
1012310123
1212312123
1312313123
1412314123
1512315123
1616
n17   1. Beautiful is beTTer than ugly.n17   1. Beautiful is better than ugly.
18   2. Explicit is better than implicit.
19   3. Simple is better than complex.18   3.   Simple is better than complex.
20   4. Complex is better than complicated.19   4. Complicated is better than complex.
20   5. Flat is better than nested.
2112321123
2212322123
2312323123
2412324123
2512325123
2712327123
2812328123
2912329123
3012330123
3131
t32   1. Beautiful is beTTer than ugly.t32   1. Beautiful is better than ugly.
33   2. Explicit is better than implicit.
34   3. Simple is better than complex.33   3.   Simple is better than complex.
35   4. Complex is better than complicated.34   4. Complicated is better than complex.
35   5. Flat is better than nested.
3612336123
3712337123
3812338123
3912339123
4012340123
+

Context (first diff after numlines=5(default))

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

from
to
74567456
84568456
94569456
1045610456
1111
n12   1. Beautiful is beTTer than ugly.n12   1. Beautiful is better than ugly.
13   2. Explicit is better than implicit.
14   3. Simple is better than complex.13   3.   Simple is better than complex.
15   4. Complex is better than complicated.14   4. Complicated is better than complex.
15   5. Flat is better than nested.
1612316123
1712317123
1812318123
1912319123
2012320123
2212322123
2312323123
2412324123
2512325123
2626
n27   1. Beautiful is beTTer than ugly.n27   1. Beautiful is better than ugly.
28   2. Explicit is better than implicit.
29   3. Simple is better than complex.28   3.   Simple is better than complex.
30   4. Complex is better than complicated.29   4. Complicated is better than complex.
30   5. Flat is better than nested.
3112331123
3212332123
3312333123
3412334123
3512335123
3712337123
3812338123
3912339123
4012340123
4141
t42   1. Beautiful is beTTer than ugly.t42   1. Beautiful is better than ugly.
43   2. Explicit is better than implicit.
44   3. Simple is better than complex.43   3.   Simple is better than complex.
45   4. Complex is better than complicated.44   4. Complicated is better than complex.
45   5. Flat is better than nested.
4612346123
4712347123
4812348123
4912349123
5012350123
+

Context (numlines=6)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

from
to
f1f1
n2   1. Beautiful is beTTer than ugly.n2   1. Beautiful is better than ugly.
3   2. Explicit is better than implicit.
4   3. Simple is better than complex.3   3.   Simple is better than complex.
5   4. Complex is better than complicated.4   4. Complicated is better than complex.
5   5. Flat is better than nested.
61236123
71237123
81238123
91239123
1012310123
1112311123
1212312123
1312313123
1412314123
1512315123
1616
n17   1. Beautiful is beTTer than ugly.n17   1. Beautiful is better than ugly.
18   2. Explicit is better than implicit.
19   3. Simple is better than complex.18   3.   Simple is better than complex.
20   4. Complex is better than complicated.19   4. Complicated is better than complex.
20   5. Flat is better than nested.
2112321123
2212322123
2312323123
2412324123
2512325123
2612326123
2712327123
2812328123
2912329123
3012330123
3131
t32   1. Beautiful is beTTer than ugly.t32   1. Beautiful is better than ugly.
33   2. Explicit is better than implicit.
34   3. Simple is better than complex.33   3.   Simple is better than complex.
35   4. Complex is better than complicated.34   4. Complicated is better than complex.
35   5. Flat is better than nested.
3612336123
3712337123
3812338123
3912339123
4012340123
4112341123
+

Context (numlines=0)

+ + + + + + + + + + + + + + + + + + + + + + + + + + +

from
to
n2   1. Beautiful is beTTer than ugly.n2   1. Beautiful is better than ugly.
3   2. Explicit is better than implicit.
4   3. Simple is better than complex.3   3.   Simple is better than complex.
5   4. Complex is better than complicated.4   4. Complicated is better than complex.
5   5. Flat is better than nested.
n17   1. Beautiful is beTTer than ugly.n17   1. Beautiful is better than ugly.
18   2. Explicit is better than implicit.
19   3. Simple is better than complex.18   3.   Simple is better than complex.
20   4. Complex is better than complicated.19   4. Complicated is better than complex.
20   5. Flat is better than nested.
t32   1. Beautiful is beTTer than ugly.t32   1. Beautiful is better than ugly.
33   2. Explicit is better than implicit.
34   3. Simple is better than complex.33   3.   Simple is better than complex.
35   4. Complex is better than complicated.34   4. Complicated is better than complex.
35   5. Flat is better than nested.
+

Same Context

+ + + + + + + + +

from
to
t No Differences Found t No Differences Found 
+

Same Full

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

from
to
t1t1
2   1. Beautiful is beTTer than ugly.2   1. Beautiful is beTTer than ugly.
3   2. Explicit is better than implicit.3   2. Explicit is better than implicit.
4   3. Simple is better than complex.4   3. Simple is better than complex.
5   4. Complex is better than complicated.5   4. Complex is better than complicated.
61236123
71237123
81238123
91239123
1012310123
1112311123
1212312123
1312313123
1412314123
1512315123
1616
17   1. Beautiful is beTTer than ugly.17   1. Beautiful is beTTer than ugly.
18   2. Explicit is better than implicit.18   2. Explicit is better than implicit.
19   3. Simple is better than complex.19   3. Simple is better than complex.
20   4. Complex is better than complicated.20   4. Complex is better than complicated.
2112321123
2212322123
2312323123
2412324123
2512325123
2612326123
2712327123
2812328123
2912329123
3012330123
3131
32   1. Beautiful is beTTer than ugly.32   1. Beautiful is beTTer than ugly.
33   2. Explicit is better than implicit.33   2. Explicit is better than implicit.
34   3. Simple is better than complex.34   3. Simple is better than complex.
35   4. Complex is better than complicated.35   4. Complex is better than complicated.
3612336123
3712337123
3812338123
3912339123
4012340123
4112341123
4212342123
4312343123
4412344123
4512345123
+

Empty Context

+ + + + + + + + +

from
to
t No Differences Found t No Differences Found 
+

Empty Full

+ + + + + + + + +

from
to
t Empty File t Empty File 
+

tabsize=2

+ + + + + + + + + + + + + +
f1f1
t2    Line 1: preceeded by from:[tt] to:[ssss]t2    Line 1: preceeded by from:[tt] to:[ssss]
3      Line 2: preceeded by from:[sstt] to:[sssst]3      Line 2: preceeded by from:[sstt] to:[sssst]
4      Line 3: preceeded by from:[sstst] to:[ssssss]4      Line 3: preceeded by from:[sstst] to:[ssssss]
5Line 4:   has from:[sst] to:[sss] after :5Line 4:   has from:[sst] to:[sss] after :
6Line 5: has from:[t] to:[ss] at end 6Line 5: has from:[t] to:[ss] at end  
+

tabsize=default

+ + + + + + + + + + + + + +
f1f1
t2                Line 1: preceeded by from:[tt] to:[ssss]t2    Line 1: preceeded by from:[tt] to:[ssss]
3                Line 2: preceeded by from:[sstt] to:[sssst]3        Line 2: preceeded by from:[sstt] to:[sssst]
4                Line 3: preceeded by from:[sstst] to:[ssssss]4      Line 3: preceeded by from:[sstst] to:[ssssss]
5Line 4:         has from:[sst] to:[sss] after :5Line 4:   has from:[sst] to:[sss] after :
6Line 5: has from:[t] to:[ss] at end     6Line 5: has from:[t] to:[ss] at end  
+

Context (wrapcolumn=14,numlines=0)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
n4line 2n4line 2    adde
 >d
n6line 4   changn6line 4   chanG
>e>E
7line 5   chang7line 5a  chanG
>ed >ed 
8line 6   chang8line 6a  chang
>e>E
n10line 8  subtran10line 8
>cted 
t1212345678901234t121234567890
>56789012345689 
>012345 
13short line13another long l
 >ine that needs
 > to be wrapped
14just fits in!!14just fitS in!!
15just fits in t15just fits in t
>wo lines yup!!>wo lineS yup!!
+

wrapcolumn=14,splitlines()

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
f1line 0f1line 0
212345678901234212345678901234
>56789012345689>56789012345689
>012345>012345
3line 13line 1
n4line 2n4line 2    adde
 >d
5line 35line 3
n6line 4   changn6line 4   chanG
>e>E
7line 5   chang7line 5a  chanG
>ed >ed 
8line 6   chang8line 6a  chang
>e>E
9line 79line 7
n10line 8  subtran10line 8
>cted 
11line 911line 9
t1212345678901234t121234567890
>56789012345689 
>012345 
13short line13another long l
 >ine that needs
 > to be wrapped
14just fits in!!14just fitS in!!
15just fits in t15just fits in t
>wo lines yup!!>wo lineS yup!!
16the end16the end
+

wrapcolumn=14,splitlines(True)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
f1line 0f1line 0
212345678901234212345678901234
>56789012345689>56789012345689
>012345>012345
3line 13line 1
n4line 2n4line 2    adde
 >d
5line 35line 3
n6line 4   changn6line 4   chanG
>e>E
7line 5   chang7line 5a  chanG
>ed >ed 
8line 6   chang8line 6a  chang
>e>E
9line 79line 7
n10line 8  subtran10line 8
>cted 
11line 911line 9
t1212345678901234t121234567890
>56789012345689 
>012345 
13short line13another long l
 >ine that needs
 > to be wrapped
14just fits in!!14just fitS in!!
15just fits in t15just fits in t
>wo lines yup!!>wo lineS yup!!
16the end16the end
+ + + \ No newline at end of file diff --git a/Misc/ACKS b/Misc/ACKS index c296d56..6eb0f64 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -201,6 +201,7 @@ Raymund Galvin Nitin Ganatra Fred Gansevles Lars Marius Garshol +Dan Gass Andrew Gaul Stephen M. Gava Harry Henry Gebel diff --git a/Misc/NEWS b/Misc/NEWS index a084d22..4656fa2 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -47,6 +47,8 @@ Core and builtins Extension modules ----------------- +- difflib now supports HTML side-by-side diff. + - os.urandom has been added for systems that support sources of random data. @@ -63,6 +65,8 @@ Extension modules Library ------- +- difflib and diff.py can now generate HTML. + - bdist_rpm now includes version and release in the BuildRoot, and replaces - by ``_`` in version and release. diff --git a/Tools/scripts/diff.py b/Tools/scripts/diff.py index ecbff61..05bfc25 100644 --- a/Tools/scripts/diff.py +++ b/Tools/scripts/diff.py @@ -1,8 +1,9 @@ -""" Command line interface to difflib.py providing diffs in three formats: +""" Command line interface to difflib.py providing diffs in four formats: * ndiff: lists every line and highlights interline changes. -* context: highlights clusters of changes in a before/after format +* context: highlights clusters of changes in a before/after format. * unified: highlights clusters of changes in an inline format. +* html: generates side by side comparison with change highlights. """ @@ -14,6 +15,7 @@ def main(): parser = optparse.OptionParser(usage) parser.add_option("-c", action="store_true", default=False, help='Produce a context format diff (default)') parser.add_option("-u", action="store_true", default=False, help='Produce a unified format diff') + parser.add_option("-m", action="store_true", default=False, help='Produce HTML side by side diff (can use -c and -l in conjunction)') parser.add_option("-n", action="store_true", default=False, help='Produce a ndiff format diff') parser.add_option("-l", "--lines", type="int", default=3, help='Set number of context lines (default 3)') (options, args) = parser.parse_args() @@ -36,6 +38,8 @@ def main(): diff = difflib.unified_diff(fromlines, tolines, fromfile, tofile, fromdate, todate, n=n) elif options.n: diff = difflib.ndiff(fromlines, tolines) + elif options.m: + diff = difflib.HtmlDiff().make_file(fromlines,tolines,fromfile,tofile,context=options.c,numlines=n) else: diff = difflib.context_diff(fromlines, tolines, fromfile, tofile, fromdate, todate, n=n) -- cgit v0.12