From ce4a9da70535b4bb9048147b141f01004af2133d Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 21 Nov 2011 20:46:33 +0100 Subject: Issue #13411: memoryview objects are now hashable when the underlying object is hashable. --- Doc/library/stdtypes.rst | 13 +++++++++++++ Include/memoryobject.h | 1 + Include/object.h | 1 + Lib/test/test_memoryview.py | 27 +++++++++++++++++++++++++++ Lib/test/test_sys.py | 4 ++-- Misc/NEWS | 3 +++ Objects/bytesobject.c | 21 +++++---------------- Objects/memoryobject.c | 34 +++++++++++++++++++++++++++++++++- Objects/object.c | 15 +++++++++++++++ 9 files changed, 100 insertions(+), 19 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 3345258..cdb2a4a 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2401,6 +2401,19 @@ copying. Memory is generally interpreted as simple bytes. Notice how the size of the memoryview object cannot be changed. + Memoryviews of hashable (read-only) types are also hashable and their + hash value matches the corresponding bytes object:: + + >>> v = memoryview(b'abcefg') + >>> hash(v) == hash(b'abcefg') + True + >>> hash(v[2:4]) == hash(b'ce') + True + + .. versionchanged:: 3.3 + Memoryview objects are now hashable. + + :class:`memoryview` has several methods: .. method:: tobytes() diff --git a/Include/memoryobject.h b/Include/memoryobject.h index 62ecbd6..aff5d99 100644 --- a/Include/memoryobject.h +++ b/Include/memoryobject.h @@ -69,6 +69,7 @@ PyAPI_FUNC(PyObject *) PyMemoryView_FromBuffer(Py_buffer *info); typedef struct { PyObject_HEAD Py_buffer view; + Py_hash_t hash; } PyMemoryViewObject; #endif diff --git a/Include/object.h b/Include/object.h index 648c9cb..b97f716 100644 --- a/Include/object.h +++ b/Include/object.h @@ -519,6 +519,7 @@ PyAPI_FUNC(void) Py_ReprLeave(PyObject *); #ifndef Py_LIMITED_API PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*); +PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t); #endif /* Helper for passing objects to printf and the like */ diff --git a/Lib/test/test_memoryview.py b/Lib/test/test_memoryview.py index 0bfddd9..a5a0ca1 100644 --- a/Lib/test/test_memoryview.py +++ b/Lib/test/test_memoryview.py @@ -283,6 +283,33 @@ class AbstractMemoryTests: i = io.BytesIO(b'ZZZZ') self.assertRaises(TypeError, i.readinto, m) + def test_hash(self): + # Memoryviews of readonly (hashable) types are hashable, and they + # hash as the corresponding object. + tp = self.ro_type + if tp is None: + self.skipTest("no read-only type to test") + b = tp(self._source) + m = self._view(b) + self.assertEqual(hash(m), hash(b"abcdef")) + # Releasing the memoryview keeps the stored hash value (as with weakrefs) + m.release() + self.assertEqual(hash(m), hash(b"abcdef")) + # Hashing a memoryview for the first time after it is released + # results in an error (as with weakrefs). + m = self._view(b) + m.release() + self.assertRaises(ValueError, hash, m) + + def test_hash_writable(self): + # Memoryviews of writable types are unhashable + tp = self.rw_type + if tp is None: + self.skipTest("no writable type to test") + b = tp(self._source) + m = self._view(b) + self.assertRaises(ValueError, hash, m) + # Variations on source objects for the buffer: bytes-like objects, then arrays # with itemsize > 1. # NOTE: support for multi-dimensional objects is unimplemented. diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index c99f4d7..ba0b592 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -770,8 +770,8 @@ class SizeofTest(unittest.TestCase): check(int(PyLong_BASE), size(vh) + 2*self.longdigit) check(int(PyLong_BASE**2-1), size(vh) + 2*self.longdigit) check(int(PyLong_BASE**2), size(vh) + 3*self.longdigit) - # memory - check(memoryview(b''), size(h + 'PP2P2i7P')) + # memory (Py_buffer + hash value) + check(memoryview(b''), size(h + 'PP2P2i7P' + 'P')) # module check(unittest, size(h + '3P')) # None diff --git a/Misc/NEWS b/Misc/NEWS index d3b5d64..faa209d3 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1? Core and Builtins ----------------- +- Issue #13411: memoryview objects are now hashable when the underlying + object is hashable. + - Issue #13338: Handle all enumerations in _Py_ANNOTATE_MEMORY_ORDER to allow compiling extension modules with -Wswitch-enum on gcc. Initial patch by Floris Bruynooghe. diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index a89798a..88411b7 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -860,22 +860,11 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) static Py_hash_t bytes_hash(PyBytesObject *a) { - register Py_ssize_t len; - register unsigned char *p; - register Py_uhash_t x; - - if (a->ob_shash != -1) - return a->ob_shash; - len = Py_SIZE(a); - p = (unsigned char *) a->ob_sval; - x = (Py_uhash_t)*p << 7; - while (--len >= 0) - x = (1000003U*x) ^ (Py_uhash_t)*p++; - x ^= (Py_uhash_t)Py_SIZE(a); - if (x == -1) - x = -2; - a->ob_shash = x; - return x; + if (a->ob_shash == -1) { + /* Can't fail */ + a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a)); + } + return a->ob_shash; } static PyObject* diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index e0d1a89..295a742 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -84,6 +84,7 @@ PyMemoryView_FromBuffer(Py_buffer *info) PyObject_GC_New(PyMemoryViewObject, &PyMemoryView_Type); if (mview == NULL) return NULL; + mview->hash = -1; dup_buffer(&mview->view, info); /* NOTE: mview->view.obj should already have been incref'ed as part of PyBuffer_FillInfo(). */ @@ -512,6 +513,37 @@ memory_repr(PyMemoryViewObject *self) return PyUnicode_FromFormat("", self); } +static Py_hash_t +memory_hash(PyMemoryViewObject *self) +{ + if (self->hash == -1) { + Py_buffer *view = &self->view; + CHECK_RELEASED_INT(self); + if (view->ndim > 1) { + PyErr_SetString(PyExc_NotImplementedError, + "can't hash multi-dimensional memoryview object"); + return -1; + } + if (view->strides && view->strides[0] != view->itemsize) { + PyErr_SetString(PyExc_NotImplementedError, + "can't hash strided memoryview object"); + return -1; + } + if (!view->readonly) { + PyErr_SetString(PyExc_ValueError, + "can't hash writable memoryview object"); + return -1; + } + if (view->obj != NULL && PyObject_Hash(view->obj) == -1) { + /* Keep the original error message */ + return -1; + } + /* Can't fail */ + self->hash = _Py_HashBytes((unsigned char *) view->buf, view->len); + } + return self->hash; +} + /* Sequence methods */ static Py_ssize_t memory_length(PyMemoryViewObject *self) @@ -829,7 +861,7 @@ PyTypeObject PyMemoryView_Type = { 0, /* tp_as_number */ &memory_as_sequence, /* tp_as_sequence */ &memory_as_mapping, /* tp_as_mapping */ - 0, /* tp_hash */ + (hashfunc)memory_hash, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ diff --git a/Objects/object.c b/Objects/object.c index 25e64e1..00f1716 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -744,6 +744,21 @@ _Py_HashPointer(void *p) } Py_hash_t +_Py_HashBytes(unsigned char *p, Py_ssize_t len) +{ + Py_uhash_t x; + Py_ssize_t i; + + x = (Py_uhash_t) *p << 7; + for (i = 0; i < len; i++) + x = (1000003U * x) ^ (Py_uhash_t) *p++; + x ^= (Py_uhash_t) len; + if (x == -1) + x = -2; + return x; +} + +Py_hash_t PyObject_HashNotImplemented(PyObject *v) { PyErr_Format(PyExc_TypeError, "unhashable type: '%.200s'", -- cgit v0.12