summaryrefslogtreecommitdiffstats
path: root/Doc/lib/libitertools.tex
diff options
context:
space:
mode:
Diffstat (limited to 'Doc/lib/libitertools.tex')
-rw-r--r--Doc/lib/libitertools.tex575
1 files changed, 0 insertions, 575 deletions
diff --git a/Doc/lib/libitertools.tex b/Doc/lib/libitertools.tex
deleted file mode 100644
index b4784a8..0000000
--- a/Doc/lib/libitertools.tex
+++ /dev/null
@@ -1,575 +0,0 @@
-\section{\module{itertools} ---
- Functions creating iterators for efficient looping}
-
-\declaremodule{standard}{itertools}
-\modulesynopsis{Functions creating iterators for efficient looping.}
-\moduleauthor{Raymond Hettinger}{python@rcn.com}
-\sectionauthor{Raymond Hettinger}{python@rcn.com}
-\versionadded{2.3}
-
-
-This module implements a number of iterator building blocks inspired
-by constructs from the Haskell and SML programming languages. Each
-has been recast in a form suitable for Python.
-
-The module standardizes a core set of fast, memory efficient tools
-that are useful by themselves or in combination. Standardization helps
-avoid the readability and reliability problems which arise when many
-different individuals create their own slightly varying implementations,
-each with their own quirks and naming conventions.
-
-The tools are designed to combine readily with one another. This makes
-it easy to construct more specialized tools succinctly and efficiently
-in pure Python.
-
-For instance, SML provides a tabulation tool: \code{tabulate(f)}
-which produces a sequence \code{f(0), f(1), ...}. This toolbox
-provides \function{imap()} and \function{count()} which can be combined
-to form \code{imap(f, count())} and produce an equivalent result.
-
-Likewise, the functional tools are designed to work well with the
-high-speed functions provided by the \refmodule{operator} module.
-
-The module author welcomes suggestions for other basic building blocks
-to be added to future versions of the module.
-
-Whether cast in pure python form or compiled code, tools that use iterators
-are more memory efficient (and faster) than their list based counterparts.
-Adopting the principles of just-in-time manufacturing, they create
-data when and where needed instead of consuming memory with the
-computer equivalent of ``inventory''.
-
-The performance advantage of iterators becomes more acute as the number
-of elements increases -- at some point, lists grow large enough to
-severely impact memory cache performance and start running slowly.
-
-\begin{seealso}
- \seetext{The Standard ML Basis Library,
- \citetitle[http://www.standardml.org/Basis/]
- {The Standard ML Basis Library}.}
-
- \seetext{Haskell, A Purely Functional Language,
- \citetitle[http://www.haskell.org/definition/]
- {Definition of Haskell and the Standard Libraries}.}
-\end{seealso}
-
-
-\subsection{Itertool functions \label{itertools-functions}}
-
-The following module functions all construct and return iterators.
-Some provide streams of infinite length, so they should only be accessed
-by functions or loops that truncate the stream.
-
-\begin{funcdesc}{chain}{*iterables}
- Make an iterator that returns elements from the first iterable until
- it is exhausted, then proceeds to the next iterable, until all of the
- iterables are exhausted. Used for treating consecutive sequences as
- a single sequence. Equivalent to:
-
- \begin{verbatim}
- def chain(*iterables):
- for it in iterables:
- for element in it:
- yield element
- \end{verbatim}
-\end{funcdesc}
-
-\begin{funcdesc}{count}{\optional{n}}
- Make an iterator that returns consecutive integers starting with \var{n}.
- If not specified \var{n} defaults to zero.
- Does not currently support python long integers. Often used as an
- argument to \function{imap()} to generate consecutive data points.
- Also, used with \function{izip()} to add sequence numbers. Equivalent to:
-
- \begin{verbatim}
- def count(n=0):
- while True:
- yield n
- n += 1
- \end{verbatim}
-
- Note, \function{count()} does not check for overflow and will return
- negative numbers after exceeding \code{sys.maxint}. This behavior
- may change in the future.
-\end{funcdesc}
-
-\begin{funcdesc}{cycle}{iterable}
- Make an iterator returning elements from the iterable and saving a
- copy of each. When the iterable is exhausted, return elements from
- the saved copy. Repeats indefinitely. Equivalent to:
-
- \begin{verbatim}
- def cycle(iterable):
- saved = []
- for element in iterable:
- yield element
- saved.append(element)
- while saved:
- for element in saved:
- yield element
- \end{verbatim}
-
- Note, this member of the toolkit may require significant
- auxiliary storage (depending on the length of the iterable).
-\end{funcdesc}
-
-\begin{funcdesc}{dropwhile}{predicate, iterable}
- Make an iterator that drops elements from the iterable as long as
- the predicate is true; afterwards, returns every element. Note,
- the iterator does not produce \emph{any} output until the predicate
- first becomes false, so it may have a lengthy start-up time. Equivalent to:
-
- \begin{verbatim}
- def dropwhile(predicate, iterable):
- iterable = iter(iterable)
- for x in iterable:
- if not predicate(x):
- yield x
- break
- for x in iterable:
- yield x
- \end{verbatim}
-\end{funcdesc}
-
-\begin{funcdesc}{groupby}{iterable\optional{, key}}
- Make an iterator that returns consecutive keys and groups from the
- \var{iterable}. The \var{key} is a function computing a key value for each
- element. If not specified or is \code{None}, \var{key} defaults to an
- identity function and returns the element unchanged. Generally, the
- iterable needs to already be sorted on the same key function.
-
- The operation of \function{groupby()} is similar to the \code{uniq} filter
- in \UNIX{}. It generates a break or new group every time the value
- of the key function changes (which is why it is usually necessary
- to have sorted the data using the same key function). That behavior
- differs from SQL's GROUP BY which aggregates common elements regardless
- of their input order.
-
- The returned group is itself an iterator that shares the underlying
- iterable with \function{groupby()}. Because the source is shared, when
- the \function{groupby} object is advanced, the previous group is no
- longer visible. So, if that data is needed later, it should be stored
- as a list:
-
- \begin{verbatim}
- groups = []
- uniquekeys = []
- data = sorted(data, key=keyfunc)
- for k, g in groupby(data, keyfunc):
- groups.append(list(g)) # Store group iterator as a list
- uniquekeys.append(k)
- \end{verbatim}
-
- \function{groupby()} is equivalent to:
-
- \begin{verbatim}
- class groupby(object):
- def __init__(self, iterable, key=None):
- if key is None:
- key = lambda x: x
- self.keyfunc = key
- self.it = iter(iterable)
- self.tgtkey = self.currkey = self.currvalue = []
- def __iter__(self):
- return self
- def __next__(self):
- while self.currkey == self.tgtkey:
- self.currvalue = next(self.it) # Exit on StopIteration
- self.currkey = self.keyfunc(self.currvalue)
- self.tgtkey = self.currkey
- return (self.currkey, self._grouper(self.tgtkey))
- def _grouper(self, tgtkey):
- while self.currkey == tgtkey:
- yield self.currvalue
- self.currvalue = next(self.it) # Exit on StopIteration
- self.currkey = self.keyfunc(self.currvalue)
- \end{verbatim}
- \versionadded{2.4}
-\end{funcdesc}
-
-\begin{funcdesc}{ifilter}{predicate, iterable}
- Make an iterator that filters elements from iterable returning only
- those for which the predicate is \code{True}.
- If \var{predicate} is \code{None}, return the items that are true.
- Equivalent to:
-
- \begin{verbatim}
- def ifilter(predicate, iterable):
- if predicate is None:
- predicate = bool
- for x in iterable:
- if predicate(x):
- yield x
- \end{verbatim}
-\end{funcdesc}
-
-\begin{funcdesc}{ifilterfalse}{predicate, iterable}
- Make an iterator that filters elements from iterable returning only
- those for which the predicate is \code{False}.
- If \var{predicate} is \code{None}, return the items that are false.
- Equivalent to:
-
- \begin{verbatim}
- def ifilterfalse(predicate, iterable):
- if predicate is None:
- predicate = bool
- for x in iterable:
- if not predicate(x):
- yield x
- \end{verbatim}
-\end{funcdesc}
-
-\begin{funcdesc}{imap}{function, *iterables}
- Make an iterator that computes the function using arguments from
- each of the iterables. If \var{function} is set to \code{None}, then
- \function{imap()} returns the arguments as a tuple. Like
- \function{map()} but stops when the shortest iterable is exhausted
- instead of filling in \code{None} for shorter iterables. The reason
- for the difference is that infinite iterator arguments are typically
- an error for \function{map()} (because the output is fully evaluated)
- but represent a common and useful way of supplying arguments to
- \function{imap()}.
- Equivalent to:
-
- \begin{verbatim}
- def imap(function, *iterables):
- iterables = map(iter, iterables)
- while True:
- args = [next(i) for i in iterables]
- if function is None:
- yield tuple(args)
- else:
- yield function(*args)
- \end{verbatim}
-\end{funcdesc}
-
-\begin{funcdesc}{islice}{iterable, \optional{start,} stop \optional{, step}}
- Make an iterator that returns selected elements from the iterable.
- If \var{start} is non-zero, then elements from the iterable are skipped
- until start is reached. Afterward, elements are returned consecutively
- unless \var{step} is set higher than one which results in items being
- skipped. If \var{stop} is \code{None}, then iteration continues until
- the iterator is exhausted, if at all; otherwise, it stops at the specified
- position. Unlike regular slicing,
- \function{islice()} does not support negative values for \var{start},
- \var{stop}, or \var{step}. Can be used to extract related fields
- from data where the internal structure has been flattened (for
- example, a multi-line report may list a name field on every
- third line). Equivalent to:
-
- \begin{verbatim}
- def islice(iterable, *args):
- s = slice(*args)
- it = iter(range(s.start or 0, s.stop or sys.maxint, s.step or 1))
- nexti = next(it)
- for i, element in enumerate(iterable):
- if i == nexti:
- yield element
- nexti = next(it)
- \end{verbatim}
-
- If \var{start} is \code{None}, then iteration starts at zero.
- If \var{step} is \code{None}, then the step defaults to one.
- \versionchanged[accept \code{None} values for default \var{start} and
- \var{step}]{2.5}
-\end{funcdesc}
-
-\begin{funcdesc}{izip}{*iterables}
- Make an iterator that aggregates elements from each of the iterables.
- Like \function{zip()} except that it returns an iterator instead of
- a list. Used for lock-step iteration over several iterables at a
- time. Equivalent to:
-
- \begin{verbatim}
- def izip(*iterables):
- iterables = map(iter, iterables)
- while iterables:
- result = [next(it) for it in iterables]
- yield tuple(result)
- \end{verbatim}
-
- \versionchanged[When no iterables are specified, returns a zero length
- iterator instead of raising a \exception{TypeError}
- exception]{2.4}
-
- Note, the left-to-right evaluation order of the iterables is guaranteed.
- This makes possible an idiom for clustering a data series into n-length
- groups using \samp{izip(*[iter(s)]*n)}. For data that doesn't fit
- n-length groups exactly, the last tuple can be pre-padded with fill
- values using \samp{izip(*[chain(s, [None]*(n-1))]*n)}.
-
- Note, when \function{izip()} is used with unequal length inputs, subsequent
- iteration over the longer iterables cannot reliably be continued after
- \function{izip()} terminates. Potentially, up to one entry will be missing
- from each of the left-over iterables. This occurs because a value is fetched
- from each iterator in-turn, but the process ends when one of the iterators
- terminates. This leaves the last fetched values in limbo (they cannot be
- returned in a final, incomplete tuple and they are cannot be pushed back
- into the iterator for retrieval with \code{next(it)}). In general,
- \function{izip()} should only be used with unequal length inputs when you
- don't care about trailing, unmatched values from the longer iterables.
-\end{funcdesc}
-
-\begin{funcdesc}{izip_longest}{*iterables\optional{, fillvalue}}
- Make an iterator that aggregates elements from each of the iterables.
- If the iterables are of uneven length, missing values are filled-in
- with \var{fillvalue}. Iteration continues until the longest iterable
- is exhausted. Equivalent to:
-
- \begin{verbatim}
- def izip_longest(*args, **kwds):
- fillvalue = kwds.get('fillvalue')
- def sentinel(counter = ([fillvalue]*(len(args)-1)).pop):
- yield counter() # yields the fillvalue, or raises IndexError
- fillers = repeat(fillvalue)
- iters = [chain(it, sentinel(), fillers) for it in args]
- try:
- for tup in izip(*iters):
- yield tup
- except IndexError:
- pass
- \end{verbatim}
-
- If one of the iterables is potentially infinite, then the
- \function{izip_longest()} function should be wrapped with something
- that limits the number of calls (for example \function{islice()} or
- \function{take()}).
- \versionadded{2.6}
-\end{funcdesc}
-
-\begin{funcdesc}{repeat}{object\optional{, times}}
- Make an iterator that returns \var{object} over and over again.
- Runs indefinitely unless the \var{times} argument is specified.
- Used as argument to \function{imap()} for invariant parameters
- to the called function. Also used with \function{izip()} to create
- an invariant part of a tuple record. Equivalent to:
-
- \begin{verbatim}
- def repeat(object, times=None):
- if times is None:
- while True:
- yield object
- else:
- for i in range(times):
- yield object
- \end{verbatim}
-\end{funcdesc}
-
-\begin{funcdesc}{starmap}{function, iterable}
- Make an iterator that computes the function using arguments tuples
- obtained from the iterable. Used instead of \function{imap()} when
- argument parameters are already grouped in tuples from a single iterable
- (the data has been ``pre-zipped''). The difference between
- \function{imap()} and \function{starmap()} parallels the distinction
- between \code{function(a,b)} and \code{function(*c)}.
- Equivalent to:
-
- \begin{verbatim}
- def starmap(function, iterable):
- iterable = iter(iterable)
- while True:
- yield function(*next(iterable))
- \end{verbatim}
-\end{funcdesc}
-
-\begin{funcdesc}{takewhile}{predicate, iterable}
- Make an iterator that returns elements from the iterable as long as
- the predicate is true. Equivalent to:
-
- \begin{verbatim}
- def takewhile(predicate, iterable):
- for x in iterable:
- if predicate(x):
- yield x
- else:
- break
- \end{verbatim}
-\end{funcdesc}
-
-\begin{funcdesc}{tee}{iterable\optional{, n=2}}
- Return \var{n} independent iterators from a single iterable.
- The case where \code{n==2} is equivalent to:
-
- \begin{verbatim}
- def tee(iterable):
- def gen(next, data={}, cnt=[0]):
- for i in count():
- if i == cnt[0]:
- item = data[i] = next()
- cnt[0] += 1
- else:
- item = data.pop(i)
- yield item
- it = iter(iterable)
- return (gen(it.__next__), gen(it.__next__))
- \end{verbatim}
-
- Note, once \function{tee()} has made a split, the original \var{iterable}
- should not be used anywhere else; otherwise, the \var{iterable} could get
- advanced without the tee objects being informed.
-
- Note, this member of the toolkit may require significant auxiliary
- storage (depending on how much temporary data needs to be stored).
- In general, if one iterator is going to use most or all of the data before
- the other iterator, it is faster to use \function{list()} instead of
- \function{tee()}.
- \versionadded{2.4}
-\end{funcdesc}
-
-
-\subsection{Examples \label{itertools-example}}
-
-The following examples show common uses for each tool and
-demonstrate ways they can be combined.
-
-\begin{verbatim}
-
->>> amounts = [120.15, 764.05, 823.14]
->>> for checknum, amount in izip(count(1200), amounts):
-... print 'Check %d is for $%.2f' % (checknum, amount)
-...
-Check 1200 is for $120.15
-Check 1201 is for $764.05
-Check 1202 is for $823.14
-
->>> import operator
->>> for cube in imap(operator.pow, range(1,5), repeat(3)):
-... print cube
-...
-1
-8
-27
-64
-
->>> reportlines = ['EuroPython', 'Roster', '', 'alex', '', 'laura',
- '', 'martin', '', 'walter', '', 'mark']
->>> for name in islice(reportlines, 3, None, 2):
-... print name.title()
-...
-Alex
-Laura
-Martin
-Walter
-Mark
-
-# Show a dictionary sorted and grouped by value
->>> from operator import itemgetter
->>> d = dict(a=1, b=2, c=1, d=2, e=1, f=2, g=3)
->>> di = sorted(d.iteritems(), key=itemgetter(1))
->>> for k, g in groupby(di, key=itemgetter(1)):
-... print k, map(itemgetter(0), g)
-...
-1 ['a', 'c', 'e']
-2 ['b', 'd', 'f']
-3 ['g']
-
-# Find runs of consecutive numbers using groupby. The key to the solution
-# is differencing with a range so that consecutive numbers all appear in
-# same group.
->>> data = [ 1, 4,5,6, 10, 15,16,17,18, 22, 25,26,27,28]
->>> for k, g in groupby(enumerate(data), lambda t:t[0]-t[1]):
-... print map(operator.itemgetter(1), g)
-...
-[1]
-[4, 5, 6]
-[10]
-[15, 16, 17, 18]
-[22]
-[25, 26, 27, 28]
-
-\end{verbatim}
-
-
-\subsection{Recipes \label{itertools-recipes}}
-
-This section shows recipes for creating an extended toolset using the
-existing itertools as building blocks.
-
-The extended tools offer the same high performance as the underlying
-toolset. The superior memory performance is kept by processing elements one
-at a time rather than bringing the whole iterable into memory all at once.
-Code volume is kept small by linking the tools together in a functional style
-which helps eliminate temporary variables. High speed is retained by
-preferring ``vectorized'' building blocks over the use of for-loops and
-generators which incur interpreter overhead.
-
-
-\begin{verbatim}
-def take(n, seq):
- return list(islice(seq, n))
-
-def enumerate(iterable):
- return izip(count(), iterable)
-
-def tabulate(function):
- "Return function(0), function(1), ..."
- return imap(function, count())
-
-def iteritems(mapping):
- return izip(mapping.iterkeys(), mapping.itervalues())
-
-def nth(iterable, n):
- "Returns the nth item or raise StopIteration"
- return islice(iterable, n, None).next()
-
-def all(seq, pred=None):
- "Returns True if pred(x) is true for every element in the iterable"
- for elem in ifilterfalse(pred, seq):
- return False
- return True
-
-def any(seq, pred=None):
- "Returns True if pred(x) is true for at least one element in the iterable"
- for elem in ifilter(pred, seq):
- return True
- return False
-
-def no(seq, pred=None):
- "Returns True if pred(x) is false for every element in the iterable"
- for elem in ifilter(pred, seq):
- return False
- return True
-
-def quantify(seq, pred=None):
- "Count how many times the predicate is true in the sequence"
- return sum(imap(pred, seq))
-
-def padnone(seq):
- """Returns the sequence elements and then returns None indefinitely.
-
- Useful for emulating the behavior of the built-in map() function.
- """
- return chain(seq, repeat(None))
-
-def ncycles(seq, n):
- "Returns the sequence elements n times"
- return chain(*repeat(seq, n))
-
-def dotproduct(vec1, vec2):
- return sum(imap(operator.mul, vec1, vec2))
-
-def flatten(listOfLists):
- return list(chain(*listOfLists))
-
-def repeatfunc(func, times=None, *args):
- """Repeat calls to func with specified arguments.
-
- Example: repeatfunc(random.random)
- """
- if times is None:
- return starmap(func, repeat(args))
- else:
- return starmap(func, repeat(args, times))
-
-def pairwise(iterable):
- "s -> (s0,s1), (s1,s2), (s2, s3), ..."
- a, b = tee(iterable)
- next(b, None)
- return izip(a, b)
-
-def grouper(n, iterable, padvalue=None):
- "grouper(3, 'abcdefg', 'x') --> ('a','b','c'), ('d','e','f'), ('g','x','x')"
- return izip(*[chain(iterable, repeat(padvalue, n-1))]*n)
-
-
-\end{verbatim}