summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTerry Reedy <tjreedy@udel.edu>2010-12-03 18:57:42 (GMT)
committerTerry Reedy <tjreedy@udel.edu>2010-12-03 18:57:42 (GMT)
commit74a7c67db15d617a09d71d5c368ddded72f0362d (patch)
treeadafd2bef5e0c23910ecdb3804d4e488892ba973
parentf3b68b3f981b8baa81e3e838ab921d2e4362ae33 (diff)
downloadcpython-74a7c67db15d617a09d71d5c368ddded72f0362d.zip
cpython-74a7c67db15d617a09d71d5c368ddded72f0362d.tar.gz
cpython-74a7c67db15d617a09d71d5c368ddded72f0362d.tar.bz2
-rw-r--r--Doc/library/difflib.rst12
-rw-r--r--Lib/difflib.py8
2 files changed, 17 insertions, 3 deletions
diff --git a/Doc/library/difflib.rst b/Doc/library/difflib.rst
index 89d69ac..bfe5e58 100644
--- a/Doc/library/difflib.rst
+++ b/Doc/library/difflib.rst
@@ -358,6 +358,16 @@ The :class:`SequenceMatcher` class has this constructor:
.. versionadded:: 3.2
The *autojunk* parameter.
+ SequenceMatcher objects get three data attributes: *bjunk* is the
+ set of elements of b for which *isjunk* is True; *bpopular* is the set of non-
+ junk elements considered popular by the heuristic (if it is not disabled);
+ *b2j* is a dict mapping the remaining elements of b to a list of positions where
+ they occur. All three are reset whenever *b* is reset with :meth:`set_seqs`
+ or :meth:`set_seq2`.
+
+.. versionadded:: 3.2
+ The *bjunk* and *bpopular* attributes.
+
:class:`SequenceMatcher` objects have the following methods:
@@ -538,7 +548,7 @@ different results due to differing levels of approximation, although
SequenceMatcher Examples
------------------------
-This example compares two strings, considering blanks to be "junk:"
+This example compares two strings, considering blanks to be "junk":
>>> s = SequenceMatcher(lambda x: x == " ",
... "private Thread currentThread;",
diff --git a/Lib/difflib.py b/Lib/difflib.py
index fa8c287..e31259a 100644
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -213,6 +213,10 @@ class SequenceMatcher:
# (at least 200 elements) and x accounts for more than 1 + 1% of
# its elements (when autojunk is enabled).
# DOES NOT WORK for x in a!
+ # bjunk
+ # the items in b for which isjunk is True.
+ # bpopular
+ # nonjunk items in b treated as junk by the heuristic (if used).
self.isjunk = isjunk
self.a = self.b = None
@@ -321,7 +325,7 @@ class SequenceMatcher:
indices.append(i)
# Purge junk elements
- junk = set()
+ self.bjunk = junk = set()
isjunk = self.isjunk
if isjunk:
for elt in list(b2j.keys()): # using list() since b2j is modified
@@ -330,7 +334,7 @@ class SequenceMatcher:
del b2j[elt]
# Purge popular elements that are not junk
- popular = set()
+ self.bpopular = popular = set()
n = len(b)
if self.autojunk and n >= 200:
ntest = n // 100 + 1