diff options
author | Terry Reedy <tjreedy@udel.edu> | 2010-12-03 18:57:42 (GMT) |
---|---|---|
committer | Terry Reedy <tjreedy@udel.edu> | 2010-12-03 18:57:42 (GMT) |
commit | 74a7c67db15d617a09d71d5c368ddded72f0362d (patch) | |
tree | adafd2bef5e0c23910ecdb3804d4e488892ba973 | |
parent | f3b68b3f981b8baa81e3e838ab921d2e4362ae33 (diff) | |
download | cpython-74a7c67db15d617a09d71d5c368ddded72f0362d.zip cpython-74a7c67db15d617a09d71d5c368ddded72f0362d.tar.gz cpython-74a7c67db15d617a09d71d5c368ddded72f0362d.tar.bz2 |
-rw-r--r-- | Doc/library/difflib.rst | 12 | ||||
-rw-r--r-- | Lib/difflib.py | 8 |
2 files changed, 17 insertions, 3 deletions
diff --git a/Doc/library/difflib.rst b/Doc/library/difflib.rst index 89d69ac..bfe5e58 100644 --- a/Doc/library/difflib.rst +++ b/Doc/library/difflib.rst @@ -358,6 +358,16 @@ The :class:`SequenceMatcher` class has this constructor: .. versionadded:: 3.2 The *autojunk* parameter. + SequenceMatcher objects get three data attributes: *bjunk* is the + set of elements of b for which *isjunk* is True; *bpopular* is the set of non- + junk elements considered popular by the heuristic (if it is not disabled); + *b2j* is a dict mapping the remaining elements of b to a list of positions where + they occur. All three are reset whenever *b* is reset with :meth:`set_seqs` + or :meth:`set_seq2`. + +.. versionadded:: 3.2 + The *bjunk* and *bpopular* attributes. + :class:`SequenceMatcher` objects have the following methods: @@ -538,7 +548,7 @@ different results due to differing levels of approximation, although SequenceMatcher Examples ------------------------ -This example compares two strings, considering blanks to be "junk:" +This example compares two strings, considering blanks to be "junk": >>> s = SequenceMatcher(lambda x: x == " ", ... "private Thread currentThread;", diff --git a/Lib/difflib.py b/Lib/difflib.py index fa8c287..e31259a 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -213,6 +213,10 @@ class SequenceMatcher: # (at least 200 elements) and x accounts for more than 1 + 1% of # its elements (when autojunk is enabled). # DOES NOT WORK for x in a! + # bjunk + # the items in b for which isjunk is True. + # bpopular + # nonjunk items in b treated as junk by the heuristic (if used). self.isjunk = isjunk self.a = self.b = None @@ -321,7 +325,7 @@ class SequenceMatcher: indices.append(i) # Purge junk elements - junk = set() + self.bjunk = junk = set() isjunk = self.isjunk if isjunk: for elt in list(b2j.keys()): # using list() since b2j is modified @@ -330,7 +334,7 @@ class SequenceMatcher: del b2j[elt] # Purge popular elements that are not junk - popular = set() + self.bpopular = popular = set() n = len(b) if self.autojunk and n >= 200: ntest = n // 100 + 1 |