summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2009-01-02 21:26:45 (GMT)
committerRaymond Hettinger <python@rcn.com>2009-01-02 21:26:45 (GMT)
commit44e1581df399d5a5d609296a0001c155a152e22a (patch)
treef026229db23549a77edd3012bf5d310d6c6fecff
parent10b1ec98df5acc652cf10d0f77340b66ab5b8f38 (diff)
downloadcpython-44e1581df399d5a5d609296a0001c155a152e22a.zip
cpython-44e1581df399d5a5d609296a0001c155a152e22a.tar.gz
cpython-44e1581df399d5a5d609296a0001c155a152e22a.tar.bz2
Issue #4615. Document how to use itertools for de-duping.
-rw-r--r--Doc/library/itertools.rst24
-rw-r--r--Lib/test/test_itertools.py36
2 files changed, 60 insertions, 0 deletions
diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst
index e0a55bd..2bd18d0 100644
--- a/Doc/library/itertools.rst
+++ b/Doc/library/itertools.rst
@@ -687,3 +687,27 @@ which incur interpreter overhead.
return
indices[i:] = [indices[i] + 1] * (r - i)
yield tuple(pool[i] for i in indices)
+
+ def unique_everseen(iterable, key=None):
+ "List unique elements, preserving order. Remember all elements ever seen."
+ # unique_everseen('AAAABBBCCDAABBB') --> A B C D
+ # unique_everseen('ABBCcAD', str.lower) --> A B C D
+ seen = set()
+ seen_add = seen.add
+ if key is None:
+ for element in iterable:
+ if element not in seen:
+ seen_add(element)
+ yield element
+ else:
+ for element in iterable:
+ k = key(element)
+ if k not in seen:
+ seen_add(k)
+ yield element
+
+ def unique_justseen(iterable, key=None):
+ "List unique elements, preserving order. Remember only the element just seen."
+ # unique_justseen('AAAABBBCCDAABBB') --> A B C D A B
+ # unique_justseen('ABBCcAD', str.lower) --> A B C A D
+ return imap(next, imap(itemgetter(1), groupby(iterable, key)))
diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py
index 6912ac7..029498a 100644
--- a/Lib/test/test_itertools.py
+++ b/Lib/test/test_itertools.py
@@ -1277,6 +1277,30 @@ Samuele
... indices[i:] = [indices[i] + 1] * (r - i)
... yield tuple(pool[i] for i in indices)
+>>> def unique_everseen(iterable, key=None):
+... "List unique elements, preserving order. Remember all elements ever seen."
+... # unique_everseen('AAAABBBCCDAABBB') --> A B C D
+... # unique_everseen('ABBCcAD', str.lower) --> A B C D
+... seen = set()
+... seen_add = seen.add
+... if key is None:
+... for element in iterable:
+... if element not in seen:
+... seen_add(element)
+... yield element
+... else:
+... for element in iterable:
+... k = key(element)
+... if k not in seen:
+... seen_add(k)
+... yield element
+
+>>> def unique_justseen(iterable, key=None):
+... "List unique elements, preserving order. Remember only the element just seen."
+... # unique_justseen('AAAABBBCCDAABBB') --> A B C D A B
+... # unique_justseen('ABBCcAD', str.lower) --> A B C A D
+... return imap(next, imap(itemgetter(1), groupby(iterable, key)))
+
This is not part of the examples but it tests to make sure the definitions
perform as purported.
@@ -1339,6 +1363,18 @@ perform as purported.
>>> list(combinations_with_replacement('abc', 2))
[('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'b'), ('b', 'c'), ('c', 'c')]
+>>> list(unique_everseen('AAAABBBCCDAABBB'))
+['A', 'B', 'C', 'D']
+
+>>> list(unique_everseen('ABBCcAD', str.lower))
+['A', 'B', 'C', 'D']
+
+>>> list(unique_justseen('AAAABBBCCDAABBB'))
+['A', 'B', 'C', 'D', 'A', 'B']
+
+>>> list(unique_justseen('ABBCcAD', str.lower))
+['A', 'B', 'C', 'A', 'D']
+
"""
__test__ = {'libreftest' : libreftest}