From 09638c16d842c77c7b9f7c0f339508c0b2a40feb Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 13 Jun 2002 19:17:46 +0000 Subject: Hopefully this addresses the remaining issues of SF bugs 459235 and 473985. Through a subtle rearrangement of some members in the etype struct (!), mapping methods are now preferred over sequence methods, which is necessary to support str.__getitem__("hello", slice(4)) etc. --- Lib/test/test_descr.py | 46 ++++++++++++++++++++++++++++++++++++ Objects/typeobject.c | 63 +++++++++++++++++++++++++++++++------------------- 2 files changed, 85 insertions(+), 24 deletions(-) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index b83ace8..1168106 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -3099,6 +3099,51 @@ def copy_setstate(): vereq(b.foo, 24) vereq(b.getfoo(), 24) +def slices(): + if verbose: + print "Testing cases with slices and overridden __getitem__ ..." + # Strings + vereq("hello"[:4], "hell") + vereq("hello"[slice(4)], "hell") + vereq(str.__getitem__("hello", slice(4)), "hell") + class S(str): + def __getitem__(self, x): + return str.__getitem__(self, x) + vereq(S("hello")[:4], "hell") + vereq(S("hello")[slice(4)], "hell") + vereq(S("hello").__getitem__(slice(4)), "hell") + # Tuples + vereq((1,2,3)[:2], (1,2)) + vereq((1,2,3)[slice(2)], (1,2)) + vereq(tuple.__getitem__((1,2,3), slice(2)), (1,2)) + class T(tuple): + def __getitem__(self, x): + return tuple.__getitem__(self, x) + vereq(T((1,2,3))[:2], (1,2)) + vereq(T((1,2,3))[slice(2)], (1,2)) + vereq(T((1,2,3)).__getitem__(slice(2)), (1,2)) + # Lists + vereq([1,2,3][:2], [1,2]) + vereq([1,2,3][slice(2)], [1,2]) + vereq(list.__getitem__([1,2,3], slice(2)), [1,2]) + class L(list): + def __getitem__(self, x): + return list.__getitem__(self, x) + vereq(L([1,2,3])[:2], [1,2]) + vereq(L([1,2,3])[slice(2)], [1,2]) + vereq(L([1,2,3]).__getitem__(slice(2)), [1,2]) + # Now do lists and __setitem__ + a = L([1,2,3]) + a[slice(1, 3)] = [3,2] + vereq(a, [1,3,2]) + a[slice(0, 2, 1)] = [3,1] + vereq(a, [3,1,2]) + a.__setitem__(slice(1, 3), [2,1]) + vereq(a, [3,2,1]) + a.__setitem__(slice(0, 2, 1), [2,3]) + vereq(a, [2,3,1]) + + def do_this_first(): if verbose: print "Testing SF bug 551412 ..." @@ -3182,6 +3227,7 @@ def test_main(): docdescriptor() string_exceptions() copy_setstate() + slices() if verbose: print "All OK" if __name__ == "__main__": diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 49c7d52..26222fa 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -8,10 +8,16 @@ /* The *real* layout of a type object when allocated on the heap */ /* XXX Should we publish this in a header file? */ typedef struct { + /* Note: there's a dependency on the order of these members + in slotptr() below. */ PyTypeObject type; PyNumberMethods as_number; - PySequenceMethods as_sequence; PyMappingMethods as_mapping; + PySequenceMethods as_sequence; /* as_sequence comes after as_mapping, + so that the mapping wins when both + the mapping and the sequence define + a given operator (e.g. __getitem__). + see add_operators() below. */ PyBufferProcs as_buffer; PyObject *name, *slots; PyMemberDef members[1]; @@ -3870,16 +3876,17 @@ slotptr(PyTypeObject *type, int offset) { char *ptr; + /* Note: this depends on the order of the members of etype! */ assert(offset >= 0); assert(offset < offsetof(etype, as_buffer)); - if (offset >= offsetof(etype, as_mapping)) { - ptr = (void *)type->tp_as_mapping; - offset -= offsetof(etype, as_mapping); - } - else if (offset >= offsetof(etype, as_sequence)) { + if (offset >= offsetof(etype, as_sequence)) { ptr = (void *)type->tp_as_sequence; offset -= offsetof(etype, as_sequence); } + else if (offset >= offsetof(etype, as_mapping)) { + ptr = (void *)type->tp_as_mapping; + offset -= offsetof(etype, as_mapping); + } else if (offset >= offsetof(etype, as_number)) { ptr = (void *)type->tp_as_number; offset -= offsetof(etype, as_number); @@ -4113,24 +4120,32 @@ fixup_slot_dispatchers(PyTypeObject *type) /* This function is called by PyType_Ready() to populate the type's dictionary with method descriptors for function slots. For each - function slot (like tp_repr) that's defined in the type, one or - more corresponding descriptors are added in the type's tp_dict - dictionary under the appropriate name (like __repr__). Some - function slots cause more than one descriptor to be added (for - example, the nb_add slot adds both __add__ and __radd__ - descriptors) and some function slots compete for the same - descriptor (for example both sq_item and mp_subscript generate a - __getitem__ descriptor). This only adds new descriptors and - doesn't overwrite entries in tp_dict that were previously - defined. The descriptors contain a reference to the C function - they must call, so that it's safe if they are copied into a - subtype's __dict__ and the subtype has a different C function in - its slot -- calling the method defined by the descriptor will call - the C function that was used to create it, rather than the C - function present in the slot when it is called. (This is important - because a subtype may have a C function in the slot that calls the - method from the dictionary, and we want to avoid infinite recursion - here.) */ + function slot (like tp_repr) that's defined in the type, one or more + corresponding descriptors are added in the type's tp_dict dictionary + under the appropriate name (like __repr__). Some function slots + cause more than one descriptor to be added (for example, the nb_add + slot adds both __add__ and __radd__ descriptors) and some function + slots compete for the same descriptor (for example both sq_item and + mp_subscript generate a __getitem__ descriptor). + + In the latter case, the first slotdef entry encoutered wins. Since + slotdef entries are sorted by the offset of the slot in the etype + struct, this gives us some control over disambiguating between + competing slots: the members of struct etype are listed from most + general to least general, so the most general slot is preferred. In + particular, because as_mapping comes before as_sequence, for a type + that defines both mp_subscript and sq_item, mp_subscript wins. + + This only adds new descriptors and doesn't overwrite entries in + tp_dict that were previously defined. The descriptors contain a + reference to the C function they must call, so that it's safe if they + are copied into a subtype's __dict__ and the subtype has a different + C function in its slot -- calling the method defined by the + descriptor will call the C function that was used to create it, + rather than the C function present in the slot when it is called. + (This is important because a subtype may have a C function in the + slot that calls the method from the dictionary, and we want to avoid + infinite recursion here.) */ static int add_operators(PyTypeObject *type) -- cgit v0.12