summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2001-09-22 21:30:22 (GMT)
committerTim Peters <tim.peters@gmail.com>2001-09-22 21:30:22 (GMT)
commit8a9c284437652826a3da83ec38c4536fa111eb40 (patch)
treecd1e3df2a8b2621c2c10b7b3f788c2c4c0ac364f
parent380bad1b4e363f3b7c23677981e19e1fa3aded1b (diff)
downloadcpython-8a9c284437652826a3da83ec38c4536fa111eb40.zip
cpython-8a9c284437652826a3da83ec38c4536fa111eb40.tar.gz
cpython-8a9c284437652826a3da83ec38c4536fa111eb40.tar.bz2
Make difflib.ndiff() and difflib.Differ.compare() generators. This
restores the 2.1 ability of Tools/scripts/ndiff.py to start producing output before the entire comparison is complete.
-rw-r--r--Doc/lib/libdifflib.tex26
-rw-r--r--Lib/difflib.py100
-rw-r--r--Misc/NEWS23
-rwxr-xr-xTools/scripts/ndiff.py5
4 files changed, 84 insertions, 70 deletions
diff --git a/Doc/lib/libdifflib.tex b/Doc/lib/libdifflib.tex
index cc9a776..a669435 100644
--- a/Doc/lib/libdifflib.tex
+++ b/Doc/lib/libdifflib.tex
@@ -32,7 +32,7 @@
\begin{classdesc*}{Differ}
This is a class for comparing sequences of lines of text, and
- producing human-readable differences or deltas. Differ uses
+ producing human-readable differences or deltas. Differ uses
\class{SequenceMatcher} both to compare sequences of lines, and to
compare sequences of characters within similar (near-matching)
lines.
@@ -85,7 +85,7 @@
\begin{funcdesc}{ndiff}{a, b\optional{, linejunk\optional{,
charjunk}}}
Compare \var{a} and \var{b} (lists of strings); return a
- \class{Differ}-style delta.
+ \class{Differ}-style delta (a generator generating the delta lines).
Optional keyword parameters \var{linejunk} and \var{charjunk} are
for filter functions (or \code{None}):
@@ -109,12 +109,12 @@
... 'ore\ntree\nemu\n'.splitlines(1)))
>>> print ''.join(diff),
- one
-? ^
+? ^
+ ore
-? ^
+? ^
- two
- three
-? -
+? -
+ tree
+ emu
\end{verbatim}
@@ -132,6 +132,7 @@
\begin{verbatim}
>>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
... 'ore\ntree\nemu\n'.splitlines(1))
+>>> diff = list(diff) # materialize the generated delta into a list
>>> print ''.join(restore(diff, 1)),
one
two
@@ -226,7 +227,7 @@ of the other sequences.
If \var{isjunk} was omitted or \code{None},
\method{get_longest_match()} returns \code{(\var{i}, \var{j},
\var{k})} such that \code{\var{a}[\var{i}:\var{i}+\var{k}]} is equal
- to \code{\var{b}[\var{j}:\var{j}+\var{k}]}, where
+ to \code{\var{b}[\var{j}:\var{j}+\var{k}]}, where
\code{\var{alo} <= \var{i} <= \var{i}+\var{k} <= \var{ahi}} and
\code{\var{blo} <= \var{j} <= \var{j}+\var{k} <= \var{bhi}}.
For all \code{(\var{i'}, \var{j'}, \var{k'})} meeting those
@@ -303,7 +304,7 @@ of the other sequences.
deleted. Note that \code{\var{j1} == \var{j2}} in
this case.}
\lineii{'insert'}{\code{\var{b}[\var{j1}:\var{j2}]} should be
- inserted at \code{\var{a}[\var{i1}:\var{i1}]}.
+ inserted at \code{\var{a}[\var{i1}:\var{i1}]}.
Note that \code{\var{i1} == \var{i2}} in this
case.}
\lineii{'equal'}{\code{\var{a}[\var{i1}:\var{i2}] ==
@@ -459,13 +460,14 @@ The \class{Differ} class has this constructor:
method:
\begin{methoddesc}{compare}{a, b}
- Compare two sequences of lines; return the resulting delta (list).
+ Compare two sequences of lines, and generate the delta (a sequence
+ of lines).
Each sequence must contain individual single-line strings ending
with newlines. Such sequences can be obtained from the
- \method{readlines()} method of file-like objects. The list returned
- is also made up of newline-terminated strings, and ready to be used
- with the \method{writelines()} method of a file-like object.
+ \method{readlines()} method of file-like objects. The delta generated
+ also consists of newline-terminated strings, ready to be printed as-is
+ via the \method{writeline()} method of a file-like object.
\end{methoddesc}
@@ -506,7 +508,7 @@ functions to filter out line and character ``junk.'' See the
Finally, we compare the two:
\begin{verbatim}
->>> result = d.compare(text1, text2)
+>>> result = list(d.compare(text1, text2))
\end{verbatim}
\code{result} is a list of strings, so let's pretty-print it:
diff --git a/Lib/difflib.py b/Lib/difflib.py
index a41d4d5..8493503 100644
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -1,5 +1,7 @@
#! /usr/bin/env python
+from __future__ import generators
+
"""
Module difflib -- helpers for computing deltas between objects.
@@ -22,8 +24,6 @@ Class Differ:
__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
'Differ']
-TRACE = 0
-
class SequenceMatcher:
"""
@@ -406,9 +406,6 @@ class SequenceMatcher:
a[besti+bestsize] == b[bestj+bestsize]:
bestsize = bestsize + 1
- if TRACE:
- print "get_matching_blocks", alo, ahi, blo, bhi
- print " returns", besti, bestj, bestsize
return besti, bestj, bestsize
def get_matching_blocks(self):
@@ -432,8 +429,6 @@ class SequenceMatcher:
la, lb = len(self.a), len(self.b)
self.__helper(0, la, 0, lb, self.matching_blocks)
self.matching_blocks.append( (la, lb, 0) )
- if TRACE:
- print '*** matching blocks', self.matching_blocks
return self.matching_blocks
# builds list of matching blocks covering a[alo:ahi] and
@@ -694,7 +689,7 @@ class Differ:
Finally, we compare the two:
- >>> result = d.compare(text1, text2)
+ >>> result = list(d.compare(text1, text2))
'result' is a list of strings, so let's pretty-print it:
@@ -731,7 +726,7 @@ class Differ:
Construct a text differencer, with optional filters.
compare(a, b)
- Compare two sequences of lines; return the resulting delta (list).
+ Compare two sequences of lines; generate the resulting delta.
"""
def __init__(self, linejunk=None, charjunk=None):
@@ -753,16 +748,15 @@ class Differ:
self.linejunk = linejunk
self.charjunk = charjunk
- self.results = []
def compare(self, a, b):
r"""
- Compare two sequences of lines; return the resulting delta (list).
+ Compare two sequences of lines; generate the resulting delta.
Each sequence must contain individual single-line strings ending with
newlines. Such sequences can be obtained from the `readlines()` method
- of file-like objects. The list returned is also made up of
- newline-terminated strings, ready to be used with the `writelines()`
+ of file-like objects. The delta generated also consists of newline-
+ terminated strings, ready to be printed as-is via the writeline()
method of a file-like object.
Example:
@@ -783,34 +777,38 @@ class Differ:
cruncher = SequenceMatcher(self.linejunk, a, b)
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if tag == 'replace':
- self._fancy_replace(a, alo, ahi, b, blo, bhi)
+ g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
elif tag == 'delete':
- self._dump('-', a, alo, ahi)
+ g = self._dump('-', a, alo, ahi)
elif tag == 'insert':
- self._dump('+', b, blo, bhi)
+ g = self._dump('+', b, blo, bhi)
elif tag == 'equal':
- self._dump(' ', a, alo, ahi)
+ g = self._dump(' ', a, alo, ahi)
else:
raise ValueError, 'unknown tag ' + `tag`
- results = self.results
- self.results = []
- return results
+
+ for line in g:
+ yield line
def _dump(self, tag, x, lo, hi):
- """Store comparison results for a same-tagged range."""
+ """Generate comparison results for a same-tagged range."""
for i in xrange(lo, hi):
- self.results.append('%s %s' % (tag, x[i]))
+ yield '%s %s' % (tag, x[i])
def _plain_replace(self, a, alo, ahi, b, blo, bhi):
assert alo < ahi and blo < bhi
# dump the shorter block first -- reduces the burden on short-term
# memory if the blocks are of very different sizes
if bhi - blo < ahi - alo:
- self._dump('+', b, blo, bhi)
- self._dump('-', a, alo, ahi)
+ first = self._dump('+', b, blo, bhi)
+ second = self._dump('-', a, alo, ahi)
else:
- self._dump('-', a, alo, ahi)
- self._dump('+', b, blo, bhi)
+ first = self._dump('-', a, alo, ahi)
+ second = self._dump('+', b, blo, bhi)
+
+ for g in first, second:
+ for line in g:
+ yield line
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
@@ -830,12 +828,6 @@ class Differ:
? ^ ^ ^
"""
- if TRACE:
- self.results.append('*** _fancy_replace %s %s %s %s\n'
- % (alo, ahi, blo, bhi))
- self._dump('>', a, alo, ahi)
- self._dump('<', b, blo, bhi)
-
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
@@ -869,7 +861,8 @@ class Differ:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
- self._plain_replace(a, alo, ahi, b, blo, bhi)
+ for line in self._plain_replace(a, alo, ahi, b, blo, bhi):
+ yield line
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
@@ -879,14 +872,10 @@ class Differ:
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
- if TRACE:
- self.results.append('*** best_ratio %s %s %s %s\n'
- % (best_ratio, best_i, best_j))
- self._dump('>', a, best_i, best_i+1)
- self._dump('<', b, best_j, best_j+1)
# pump out diffs from before the synch point
- self._fancy_helper(a, alo, best_i, b, blo, best_j)
+ for line in self._fancy_helper(a, alo, best_i, b, blo, best_j):
+ yield line
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
@@ -908,22 +897,28 @@ class Differ:
btags += ' ' * lb
else:
raise ValueError, 'unknown tag ' + `tag`
- self._qformat(aelt, belt, atags, btags)
+ for line in self._qformat(aelt, belt, atags, btags):
+ yield line
else:
# the synch pair is identical
- self.results.append(' ' + aelt)
+ yield ' ' + aelt
# pump out diffs from after the synch point
- self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
+ for line in self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi):
+ yield line
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
+ g = []
if alo < ahi:
if blo < bhi:
- self._fancy_replace(a, alo, ahi, b, blo, bhi)
+ g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
- self._dump('-', a, alo, ahi)
+ g = self._dump('-', a, alo, ahi)
elif blo < bhi:
- self._dump('+', b, blo, bhi)
+ g = self._dump('+', b, blo, bhi)
+
+ for line in g:
+ yield line
def _qformat(self, aline, bline, atags, btags):
r"""
@@ -949,13 +944,13 @@ class Differ:
atags = atags[common:].rstrip()
btags = btags[common:].rstrip()
- self.results.append("- " + aline)
+ yield "- " + aline
if atags:
- self.results.append("? %s%s\n" % ("\t" * common, atags))
+ yield "? %s%s\n" % ("\t" * common, atags)
- self.results.append("+ " + bline)
+ yield "+ " + bline
if btags:
- self.results.append("? %s%s\n" % ("\t" * common, btags))
+ yield "? %s%s\n" % ("\t" * common, btags)
# With respect to junk, an earlier version of ndiff simply refused to
# *start* a match with a junk element. The result was cases like this:
@@ -1050,7 +1045,7 @@ def ndiff(a, b, linejunk=IS_LINE_JUNK, charjunk=IS_CHARACTER_JUNK):
def restore(delta, which):
r"""
- Return one of the two sequences that generated a delta.
+ Generate one of the two sequences that generated a delta.
Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract
lines originating from file 1 or 2 (parameter `which`), stripping off line
@@ -1060,6 +1055,7 @@ def restore(delta, which):
>>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
... 'ore\ntree\nemu\n'.splitlines(1))
+ >>> diff = list(diff)
>>> print ''.join(restore(diff, 1)),
one
two
@@ -1075,11 +1071,9 @@ def restore(delta, which):
raise ValueError, ('unknown delta choice (must be 1 or 2): %r'
% which)
prefixes = (" ", tag)
- results = []
for line in delta:
if line[:2] in prefixes:
- results.append(line[2:])
- return results
+ yield line[2:]
def _test():
import doctest, difflib
diff --git a/Misc/NEWS b/Misc/NEWS
index 713eb17..cf8e3fc 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -30,7 +30,7 @@ Core
- In 2.2a3, __new__ would only see sequential arguments passed to the
type in a constructor call; __init__ would see both sequential and
- positional arguments. This made no sense whatsoever any more, so
+ keyword arguments. This made no sense whatsoever any more, so
now both __new__ and __init__ see all arguments.
- In 2.2a3, hash() applied to an instance of a subclass of str or unicode
@@ -54,6 +54,10 @@ Core
Library
+- difflib.ndiff() and difflib.Differ.compare() are generators now. This
+ restores the ability of Tools/scripts/ndiff.py to start producing output
+ before the entire comparison is complete.
+
- StringIO.StringIO instances and cStringIO.StringIO instances support
iteration just like file objects (i.e. their .readline() method is
called for each iteration until it returns an empty string).
@@ -124,10 +128,25 @@ New platforms
Tests
+- The "classic" standard tests, which work by comparing stdout to
+ an expected-output file under Lib/test/output/, no longer stop at
+ the first mismatch. Instead the test is run to completion, and a
+ variant of ndiff-style comparison is used to report all differences.
+ This is much easier to understand than the previous style of reporting.
+
+- The unittest-based standard tests now use regrtest's test_main()
+ convention, instead of running as a side-effect of merely being
+ imported. This allows these tests to be run in more natural and
+ flexible ways as unittests, outside the regrtest framework.
+
+- regrtest.py is much better integrated with unittest and doctest now,
+ especially in regard to reporting errors.
+
Windows
- Large file support now also works for files > 4GB, on filesystems
- that support it (NTFS under Windows 2000).
+ that support it (NTFS under Windows 2000). See "What's New in
+ Python 2.2a3" for more detail.
What's New in Python 2.2a3?
diff --git a/Tools/scripts/ndiff.py b/Tools/scripts/ndiff.py
index a5468f6..6f0f9a9 100755
--- a/Tools/scripts/ndiff.py
+++ b/Tools/scripts/ndiff.py
@@ -73,9 +73,8 @@ def fcompare(f1name, f2name):
a = f1.readlines(); f1.close()
b = f2.readlines(); f2.close()
-
- diff = difflib.ndiff(a, b)
- sys.stdout.writelines(diff)
+ for line in difflib.ndiff(a, b):
+ print line,
return 1