summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2001-10-26 05:06:50 (GMT)
committerTim Peters <tim.peters@gmail.com>2001-10-26 05:06:50 (GMT)
commit1fc240e85150f5cb39502a87cc9a4a0a8cbe5ab0 (patch)
treed764262205e36bcc61e7cb42895236fdca67c9d3
parentb016da3b8391b7401afd95f2c90f5073976c475b (diff)
downloadcpython-1fc240e85150f5cb39502a87cc9a4a0a8cbe5ab0.zip
cpython-1fc240e85150f5cb39502a87cc9a4a0a8cbe5ab0.tar.gz
cpython-1fc240e85150f5cb39502a87cc9a4a0a8cbe5ab0.tar.bz2
Generalize dictionary() to accept a sequence of 2-sequences. At the
outer level, the iterator protocol is used for memory-efficiency (the outer sequence may be very large if fully materialized); at the inner level, PySequence_Fast() is used for time-efficiency (these should always be sequences of length 2). dictobject.c, new functions PyDict_{Merge,Update}FromSeq2. These are wholly analogous to PyDict_{Merge,Update}, but process a sequence-of-2- sequences argument instead of a mapping object. For now, I left these functions file static, so no corresponding doc changes. It's tempting to change dict.update() to allow a sequence-of-2-seqs argument too. Also changed the name of dictionary's keyword argument from "mapping" to "x". Got a better name? "mapping_or_sequence_of_pairs" isn't attractive, although more so than "mosop" <wink>. abstract.h, abstract.tex: Added new PySequence_Fast_GET_SIZE function, much faster than going thru the all-purpose PySequence_Size. libfuncs.tex: - Document dictionary(). - Fiddle tuple() and list() to admit that their argument is optional. - The long-winded repetitions of "a sequence, a container that supports iteration, or an iterator object" is getting to be a PITA. Many months ago I suggested factoring this out into "iterable object", where the definition of that could include being explicit about generators too (as is, I'm not sure a reader outside of PythonLabs could guess that "an iterator object" includes a generator call). - Please check my curly braces -- I'm going blind <0.9 wink>. abstract.c, PySequence_Tuple(): When PyObject_GetIter() fails, leave its error msg alone now (the msg it produces has improved since PySequence_Tuple was generalized to accept iterable objects, and PySequence_Tuple was also stomping on the msg in cases it shouldn't have even before PyObject_GetIter grew a better msg).
-rw-r--r--Doc/api/abstract.tex17
-rw-r--r--Doc/lib/libfuncs.tex26
-rw-r--r--Include/abstract.h12
-rw-r--r--Lib/test/test_descr.py51
-rw-r--r--Misc/NEWS9
-rw-r--r--Objects/abstract.c2
-rw-r--r--Objects/dictobject.c118
7 files changed, 199 insertions, 36 deletions
diff --git a/Doc/api/abstract.tex b/Doc/api/abstract.tex
index 8d271df..fae8475 100644
--- a/Doc/api/abstract.tex
+++ b/Doc/api/abstract.tex
@@ -125,7 +125,7 @@ for which they do not apply, they will raise a Python exception.
the Unicode string representation on success, \NULL{} on failure.
This is the equivalent of the Python expression
\samp{unistr(\var{o})}. Called by the
- \function{unistr()}\bifuncindex{unistr} built-in function.
+ \function{unistr()}\bifuncindex{unistr} built-in function.
\end{cfuncdesc}
\begin{cfuncdesc}{int}{PyObject_IsInstance}{PyObject *inst, PyObject *cls}
@@ -715,10 +715,17 @@ determination.
\begin{cfuncdesc}{PyObject*}{PySequence_Fast_GET_ITEM}{PyObject *o, int i}
Return the \var{i}th element of \var{o}, assuming that \var{o} was
- returned by \cfunction{PySequence_Fast()}, and that \var{i} is
- within bounds. The caller is expected to get the length of the
- sequence by calling \cfunction{PySequence_Size()} on \var{o}, since
- lists and tuples are guaranteed to always return their true length.
+ returned by \cfunction{PySequence_Fast()}, \var{o} is not \NULL{},
+ and that \var{i} is within bounds.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{int}{PySequence_Fast_GET_SIZE}{PyObject *o}
+ Returns the length of \var{o}, assuming that \var{o} was
+ returned by \cfunction{PySequence_Fast()} and that \var{o} is
+ not \NULL{}. The size can also be gotten by calling
+ \cfunction{PySequence_Size()} on \var{o}, but
+ \cfunction{PySequence_Fast_GET_SIZE()} is faster because it can
+ assume \var{o} is a list or tuple.
\end{cfuncdesc}
diff --git a/Doc/lib/libfuncs.tex b/Doc/lib/libfuncs.tex
index b19d4a6..e9baeb3 100644
--- a/Doc/lib/libfuncs.tex
+++ b/Doc/lib/libfuncs.tex
@@ -175,6 +175,28 @@ def my_import(name):
\code{del \var{x}.\var{foobar}}.
\end{funcdesc}
+\begin{funcdesc}{dictionary}{\optional{mapping-or-sequence}}
+ Return a new dictionary initialized from the optional argument.
+ If an argument is not specified, return a new empty dictionary.
+ If the argument is a mapping object, return a dictionary mapping the
+ same keys to the same values as does the mapping object.
+ Else the argument must be a sequence, a container that supports
+ iteration, or an iterator object. The elements of the argument must
+ each also be of one of those kinds, and each must in turn contain
+ exactly two objects. The first is used as a key in the new dictionary,
+ and the second as the key's value. If a given key is seen more than
+ once, the last value associated with it is retained in the new
+ dictionary.
+ For example, these all return a dictionary equal to
+ \code{\{1: 2, 2: 3\}}:
+ \code{dictionary(\{1: 2, 2: 3\})},
+ \code{dictionary(\{1: 2, 2: 3\}.items()},
+ \code{dictionary(\{1: 2, 2: 3\}.iteritems()},
+ \code{dictionary(zip((1, 2), (2, 3)))},
+ \code{dictionary([[2, 3], [1, 2]])}, and
+ \code{dictionary([(i-1, i) for i in (2, 3)])}.
+\end{funcdesc}
+
\begin{funcdesc}{dir}{\optional{object}}
Without arguments, return the list of names in the current local
symbol table. With an argument, attempts to return a list of valid
@@ -472,7 +494,7 @@ def my_import(name):
may be a sequence (string, tuple or list) or a mapping (dictionary).
\end{funcdesc}
-\begin{funcdesc}{list}{sequence}
+\begin{funcdesc}{list}{\optional{sequence}}
Return a list whose items are the same and in the same order as
\var{sequence}'s items. \var{sequence} may be either a sequence, a
container that supports iteration, or an iterator object. If
@@ -726,7 +748,7 @@ def my_import(name):
printable string.
\end{funcdesc}
-\begin{funcdesc}{tuple}{sequence}
+\begin{funcdesc}{tuple}{\optional{sequence}}
Return a tuple whose items are the same and in the same order as
\var{sequence}'s items. \var{sequence} may be a sequence, a
container that supports iteration, or an iterator object.
diff --git a/Include/abstract.h b/Include/abstract.h
index d736efc..351149d 100644
--- a/Include/abstract.h
+++ b/Include/abstract.h
@@ -951,26 +951,30 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
DL_IMPORT(PyObject *) PySequence_List(PyObject *o);
-
/*
Returns the sequence, o, as a list on success, and NULL on failure.
This is equivalent to the Python expression: list(o)
*/
DL_IMPORT(PyObject *) PySequence_Fast(PyObject *o, const char* m);
-
/*
Returns the sequence, o, as a tuple, unless it's already a
tuple or list. Use PySequence_Fast_GET_ITEM to access the
- members of this list.
+ members of this list, and PySequence_Fast_GET_SIZE to get its length.
Returns NULL on failure. If the object does not support iteration,
raises a TypeError exception with m as the message text.
*/
+#define PySequence_Fast_GET_SIZE(o) \
+ (PyList_Check(o) ? PyList_GET_SIZE(o) : PyTuple_GET_SIZE(o))
+ /*
+ Return the size of o, assuming that o was returned by
+ PySequence_Fast and is not NULL.
+ */
+
#define PySequence_Fast_GET_ITEM(o, i)\
(PyList_Check(o) ? PyList_GET_ITEM(o, i) : PyTuple_GET_ITEM(o, i))
-
/*
Return the ith element of o, assuming that o was returned by
PySequence_Fast, and that i is within bounds.
diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py
index 87f4f0f..230d6a1 100644
--- a/Lib/test/test_descr.py
+++ b/Lib/test/test_descr.py
@@ -178,15 +178,25 @@ def dict_constructor():
vereq(d, {})
d = dictionary({})
vereq(d, {})
- d = dictionary(mapping={})
+ d = dictionary(x={})
vereq(d, {})
d = dictionary({1: 2, 'a': 'b'})
vereq(d, {1: 2, 'a': 'b'})
+ vereq(d, dictionary(d.items()))
+ vereq(d, dictionary(x=d.iteritems()))
for badarg in 0, 0L, 0j, "0", [0], (0,):
try:
dictionary(badarg)
except TypeError:
pass
+ except ValueError:
+ if badarg == "0":
+ # It's a sequence, and its elements are also sequences (gotta
+ # love strings <wink>), but they aren't of length 2, so this
+ # one seemed better as a ValueError than a TypeError.
+ pass
+ else:
+ raise TestFailed("no TypeError from dictionary(%r)" % badarg)
else:
raise TestFailed("no TypeError from dictionary(%r)" % badarg)
try:
@@ -194,7 +204,7 @@ def dict_constructor():
except TypeError:
pass
else:
- raise TestFailed("no TypeError from dictionary(senseless={}")
+ raise TestFailed("no TypeError from dictionary(senseless={})")
try:
dictionary({}, {})
@@ -204,11 +214,9 @@ def dict_constructor():
raise TestFailed("no TypeError from dictionary({}, {})")
class Mapping:
+ # Lacks a .keys() method; will be added later.
dict = {1:2, 3:4, 'a':1j}
- def __getitem__(self, i):
- return self.dict[i]
-
try:
dictionary(Mapping())
except TypeError:
@@ -217,9 +225,36 @@ def dict_constructor():
raise TestFailed("no TypeError from dictionary(incomplete mapping)")
Mapping.keys = lambda self: self.dict.keys()
- d = dictionary(mapping=Mapping())
+ Mapping.__getitem__ = lambda self, i: self.dict[i]
+ d = dictionary(x=Mapping())
vereq(d, Mapping.dict)
+ # Init from sequence of iterable objects, each producing a 2-sequence.
+ class AddressBookEntry:
+ def __init__(self, first, last):
+ self.first = first
+ self.last = last
+ def __iter__(self):
+ return iter([self.first, self.last])
+
+ d = dictionary([AddressBookEntry('Tim', 'Warsaw'),
+ AddressBookEntry('Barry', 'Peters'),
+ AddressBookEntry('Tim', 'Peters'),
+ AddressBookEntry('Barry', 'Warsaw')])
+ vereq(d, {'Barry': 'Warsaw', 'Tim': 'Peters'})
+
+ d = dictionary(zip(range(4), range(1, 5)))
+ vereq(d, dictionary([(i, i+1) for i in range(4)]))
+
+ # Bad sequence lengths.
+ for bad in ['tooshort'], ['too', 'long', 'by 1']:
+ try:
+ dictionary(bad)
+ except ValueError:
+ pass
+ else:
+ raise TestFailed("no ValueError from dictionary(%r)" % bad)
+
def test_dir():
if verbose:
print "Testing dir() ..."
@@ -1830,7 +1865,7 @@ def keywords():
vereq(unicode(string='abc', errors='strict'), u'abc')
vereq(tuple(sequence=range(3)), (0, 1, 2))
vereq(list(sequence=(0, 1, 2)), range(3))
- vereq(dictionary(mapping={1: 2}), {1: 2})
+ vereq(dictionary(x={1: 2}), {1: 2})
for constructor in (int, float, long, complex, str, unicode,
tuple, list, dictionary, file):
@@ -2371,7 +2406,7 @@ def kwdargs():
vereq(f.__call__(a=42), 42)
a = []
list.__init__(a, sequence=[0, 1, 2])
- vereq(a, [0, 1, 2])
+ vereq(a, [0, 1, 2])
def test_main():
class_docstrings()
diff --git a/Misc/NEWS b/Misc/NEWS
index aa0ab81..e55b9de 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -4,6 +4,11 @@ XXX Planned XXX Release date: 14-Nov-2001
Type/class unification and new-style classes
+- dictionary() now accepts an iterable object producing 2-sequences.
+ For example, dictionary(d.items()) == d for any dictionary d. The
+ argument, and the elements of the argument, can be any iterable
+ objects.
+
- Methods of built-in types now properly check for keyword arguments
(formerly these were silently ignored). The only built-in methods
that take keyword arguments are __call__, __init__ and __new__.
@@ -31,6 +36,10 @@ Build
C API
+- New function PySequence_Fast_GET_SIZE() returns the size of a non-
+ NULL result from PySequence_Fast(), more quickly than calling
+ PySequence_Size().
+
New platforms
- Updated RISCOS port by Dietmar Schwertberger.
diff --git a/Objects/abstract.c b/Objects/abstract.c
index 8a715c8..6b9201b 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -1278,7 +1278,7 @@ PySequence_Tuple(PyObject *v)
/* Get iterator. */
it = PyObject_GetIter(v);
if (it == NULL)
- return type_error("tuple() argument must support iteration");
+ return NULL;
/* Guess result size and allocate space. */
n = PySequence_Size(v);
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 829f76d..f901499 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -993,7 +993,89 @@ dict_update(PyObject *mp, PyObject *other)
/* Update unconditionally replaces existing items.
Merge has a 3rd argument 'override'; if set, it acts like Update,
- otherwise it leaves existing items unchanged. */
+ otherwise it leaves existing items unchanged.
+
+ PyDict_{Update,Merge} update/merge from a mapping object.
+
+ PyDict_{Update,Merge}FromSeq2 update/merge from any iterable object
+ producing iterable objects of length 2.
+*/
+
+static int
+PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
+{
+ PyObject *it; /* iter(seq2) */
+ int i; /* index into seq2 of current element */
+ PyObject *item; /* seq2[i] */
+ PyObject *fast; /* item as a 2-tuple or 2-list */
+
+ assert(d != NULL);
+ assert(PyDict_Check(d));
+ assert(seq2 != NULL);
+
+ it = PyObject_GetIter(seq2);
+ if (it == NULL)
+ return -1;
+
+ for (i = 0; ; ++i) {
+ PyObject *key, *value;
+ int n;
+
+ fast = NULL;
+ item = PyIter_Next(it);
+ if (item == NULL) {
+ if (PyErr_Occurred())
+ goto Fail;
+ break;
+ }
+
+ /* Convert item to sequence, and verify length 2. */
+ fast = PySequence_Fast(item, "");
+ if (fast == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_TypeError))
+ PyErr_Format(PyExc_TypeError,
+ "cannot convert dictionary update "
+ "sequence element #%d to a sequence",
+ i);
+ goto Fail;
+ }
+ n = PySequence_Fast_GET_SIZE(fast);
+ if (n != 2) {
+ PyErr_Format(PyExc_ValueError,
+ "dictionary update sequence element #%d "
+ "has length %d; 2 is required",
+ i, n);
+ goto Fail;
+ }
+
+ /* Update/merge with this (key, value) pair. */
+ key = PySequence_Fast_GET_ITEM(fast, 0);
+ value = PySequence_Fast_GET_ITEM(fast, 1);
+ if (override || PyDict_GetItem(d, key) == NULL) {
+ int status = PyDict_SetItem(d, key, value);
+ if (status < 0)
+ goto Fail;
+ }
+ Py_DECREF(fast);
+ Py_DECREF(item);
+ }
+
+ i = 0;
+ goto Return;
+Fail:
+ Py_XDECREF(item);
+ Py_XDECREF(fast);
+ i = -1;
+Return:
+ Py_DECREF(it);
+ return i;
+}
+
+static int
+PyDict_UpdateFromSeq2(PyObject *d, PyObject *seq2)
+{
+ return PyDict_MergeFromSeq2(d, seq2, 1);
+}
int
PyDict_Update(PyObject *a, PyObject *b)
@@ -1699,23 +1781,20 @@ static int
dict_init(PyObject *self, PyObject *args, PyObject *kwds)
{
PyObject *arg = NULL;
- static char *kwlist[] = {"mapping", 0};
+ static char *kwlist[] = {"x", 0};
+ int result = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:dictionary",
kwlist, &arg))
- return -1;
- if (arg != NULL) {
- if (PyDict_Merge(self, arg, 1) < 0) {
- /* An error like "AttributeError: keys" is too
- cryptic in this context. */
- if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
- PyErr_SetString(PyExc_TypeError,
- "argument must be of a mapping type");
- }
- return -1;
- }
+ result = -1;
+
+ else if (arg != NULL) {
+ if (PyObject_HasAttrString(arg, "keys"))
+ result = PyDict_Merge(self, arg, 1);
+ else
+ result = PyDict_MergeFromSeq2(self, arg, 1);
}
- return 0;
+ return result;
}
static PyObject *
@@ -1725,8 +1804,15 @@ dict_iter(dictobject *dict)
}
static char dictionary_doc[] =
-"dictionary() -> new empty dictionary\n"
-"dictionary(mapping) -> new dict initialized from mapping's key+value pairs";
+"dictionary() -> new empty dictionary.\n"
+"dictionary(mapping) -> new dict initialized from a mapping object's\n"
+" (key, value) pairs.\n"
+"dictionary(seq) -> new dict initialized from the 2-element elements of\n"
+" a sequence; for example, from mapping.items(). seq must be an\n"
+" iterable object, producing iterable objects each producing exactly\n"
+" two objects, the first of which is used as a key and the second as\n"
+" its value. If a given key is seen more than once, the dict retains\n"
+" the last value associated with it.";
PyTypeObject PyDict_Type = {
PyObject_HEAD_INIT(&PyType_Type)