From 06e1ab0a6b51c35e4637bb82c3aa18548b6412b0 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sat, 25 Aug 2012 17:59:50 +1000 Subject: Close #15573: use value-based memoryview comparisons (patch by Stefan Krah) --- Doc/library/stdtypes.rst | 78 ++++++-- Doc/whatsnew/3.3.rst | 7 +- Lib/test/test_buffer.py | 498 ++++++++++++++++++++++++++++++++++++++++------- Misc/NEWS | 3 + Objects/memoryobject.c | 326 +++++++++++++++++++++++++++---- 5 files changed, 779 insertions(+), 133 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 8adbabe..34d6f89 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2405,6 +2405,52 @@ copying. :class:`memoryview` has several methods: + .. method:: __eq__(exporter) + + A memoryview and a :pep:`3118` exporter are equal if their shapes are + equivalent and if all corresponding values are equal when the operands' + respective format codes are interpreted using :mod:`struct` syntax. + + For the subset of :mod:`struct` format strings currently supported by + :meth:`tolist`, ``v`` and ``w`` are equal if ``v.tolist() == w.tolist()``:: + + >>> import array + >>> a = array.array('I', [1, 2, 3, 4, 5]) + >>> b = array.array('d', [1.0, 2.0, 3.0, 4.0, 5.0]) + >>> c = array.array('b', [5, 3, 1]) + >>> x = memoryview(a) + >>> y = memoryview(b) + >>> x == a == y == b + True + >>> x.tolist() == a.tolist() == y.tolist() == b.tolist() + True + >>> z = y[::-2] + >>> z == c + True + >>> z.tolist() == c.tolist() + True + + If either format string is not supported by the :mod:`struct` module, + then the objects will always compare as unequal (even if the format + strings and buffer contents are identical):: + + >>> from ctypes import BigEndianStructure, c_long + >>> class BEPoint(BigEndianStructure): + ... _fields_ = [("x", c_long), ("y", c_long)] + ... + >>> point = BEPoint(100, 200) + >>> a = memoryview(point) + >>> b = memoryview(point) + >>> a == point + False + >>> a == b + False + + Note that, as with floating point numbers, ``v is w`` does *not* imply + ``v == w`` for memoryview objects. + + .. versionchanged:: 3.3 + .. method:: tobytes() Return the data in the buffer as a bytestring. This is equivalent to @@ -2417,7 +2463,9 @@ copying. b'abc' For non-contiguous arrays the result is equal to the flattened list - representation with all elements converted to bytes. + representation with all elements converted to bytes. :meth:`tobytes` + supports all format strings, including those that are not in + :mod:`struct` module syntax. .. method:: tolist() @@ -2431,6 +2479,9 @@ copying. >>> m.tolist() [1.1, 2.2, 3.3] + :meth:`tolist` is currently restricted to single character native formats + in :mod:`struct` module syntax. + .. method:: release() Release the underlying buffer exposed by the memoryview object. Many @@ -2470,7 +2521,10 @@ copying. ``[byte_length//new_itemsize]``, which means that the result view will be one-dimensional. The return value is a new memoryview, but the buffer itself is not copied. Supported casts are 1D -> C-contiguous - and C-contiguous -> 1D. One of the formats must be a byte format + and C-contiguous -> 1D. + + Both formats are restricted to single element native formats in + :mod:`struct` syntax. One of the formats must be a byte format ('B', 'b' or 'c'). The byte length of the result must be the same as the original length. @@ -2608,25 +2662,7 @@ copying. A string containing the format (in :mod:`struct` module style) for each element in the view. A memoryview can be created from exporters with arbitrary format strings, but some methods (e.g. :meth:`tolist`) are - restricted to native single element formats. Special care must be taken - when comparing memoryviews. Since comparisons are required to return a - value for ``==`` and ``!=``, two memoryviews referencing the same - exporter can compare as not-equal if the exporter's format is not - understood:: - - >>> from ctypes import BigEndianStructure, c_long - >>> class BEPoint(BigEndianStructure): - ... _fields_ = [("x", c_long), ("y", c_long)] - ... - >>> point = BEPoint(100, 200) - >>> a = memoryview(point) - >>> b = memoryview(point) - >>> a == b - False - >>> a.tolist() - Traceback (most recent call last): - File "", line 1, in - NotImplementedError: memoryview: unsupported format T{>l:x:>l:y:} + restricted to native single element formats. .. attribute:: itemsize diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst index 968e788..27bf12d 100644 --- a/Doc/whatsnew/3.3.rst +++ b/Doc/whatsnew/3.3.rst @@ -162,7 +162,6 @@ Features and the view is read-only. (Contributed by Antoine Pitrou in :issue:`13411`) - * Arbitrary slicing of any 1-D arrays type is supported. For example, it is now possible to reverse a memoryview in O(1) by using a negative step. @@ -178,6 +177,12 @@ API changes now returns an integer (in accordance with the struct module syntax). For returning a bytes object the view must be cast to 'c' first. +* memoryview comparisons now use the logical structure of the operands + and compare all array elements by value. All format strings in struct + module syntax are supported. Views with unrecognised format strings + are still permitted, but will always compare as unequal, regardless + of view contents. + * For further changes see `Build and C API Changes`_ and `Porting C code`_ . .. _pep-393: diff --git a/Lib/test/test_buffer.py b/Lib/test/test_buffer.py index fb85dae..26cd9be 100644 --- a/Lib/test/test_buffer.py +++ b/Lib/test/test_buffer.py @@ -32,6 +32,11 @@ except ImportError: struct = None try: + import ctypes +except ImportError: + ctypes = None + +try: with warnings.catch_warnings(): from numpy import ndarray as numpy_array except ImportError: @@ -835,8 +840,6 @@ class TestBufferProtocol(unittest.TestCase): # test tobytes() self.assertEqual(result.tobytes(), b) - if not buf_err and is_memoryview_format(fmt): - # lst := expected multi-dimensional logical representation # flatten(lst) := elements in C-order ff = fmt if fmt else 'B' @@ -877,8 +880,10 @@ class TestBufferProtocol(unittest.TestCase): # To 'C' contig = py_buffer_to_contiguous(result, 'C', PyBUF_FULL_RO) self.assertEqual(len(contig), nmemb * itemsize) - initlst = [struct.unpack_from(fmt, contig, n*itemsize)[0] + initlst = [struct.unpack_from(fmt, contig, n*itemsize) for n in range(nmemb)] + if len(initlst[0]) == 1: + initlst = [v[0] for v in initlst] y = ndarray(initlst, shape=shape, flags=ro, format=fmt) self.assertEqual(memoryview(y), memoryview(result)) @@ -886,8 +891,10 @@ class TestBufferProtocol(unittest.TestCase): # To 'F' contig = py_buffer_to_contiguous(result, 'F', PyBUF_FULL_RO) self.assertEqual(len(contig), nmemb * itemsize) - initlst = [struct.unpack_from(fmt, contig, n*itemsize)[0] + initlst = [struct.unpack_from(fmt, contig, n*itemsize) for n in range(nmemb)] + if len(initlst[0]) == 1: + initlst = [v[0] for v in initlst] y = ndarray(initlst, shape=shape, flags=ro|ND_FORTRAN, format=fmt) @@ -896,8 +903,10 @@ class TestBufferProtocol(unittest.TestCase): # To 'A' contig = py_buffer_to_contiguous(result, 'A', PyBUF_FULL_RO) self.assertEqual(len(contig), nmemb * itemsize) - initlst = [struct.unpack_from(fmt, contig, n*itemsize)[0] + initlst = [struct.unpack_from(fmt, contig, n*itemsize) for n in range(nmemb)] + if len(initlst[0]) == 1: + initlst = [v[0] for v in initlst] f = ND_FORTRAN if is_contiguous(result, 'F') else 0 y = ndarray(initlst, shape=shape, flags=f|ro, format=fmt) @@ -3025,7 +3034,7 @@ class TestBufferProtocol(unittest.TestCase): self.assertEqual(m.tobytes(), a.tobytes()) cmptest(self, a, b, m, singleitem) - def test_memoryview_compare(self): + def test_memoryview_compare_special_cases(self): a = array.array('L', [1, 2, 3]) b = array.array('L', [1, 2, 7]) @@ -3054,43 +3063,32 @@ class TestBufferProtocol(unittest.TestCase): v = memoryview(a) self.assertNotEqual(v, [1, 2, 3]) - # Different formats: - c = array.array('l', [1, 2, 3]) - v = memoryview(a) - self.assertNotEqual(v, c) - self.assertNotEqual(c, v) + # NaNs + nd = ndarray([(0, 0)], shape=[1], format='l x d x', flags=ND_WRITABLE) + nd[0] = (-1, float('nan')) + self.assertNotEqual(memoryview(nd), nd) - # Not implemented formats. Ugly, but inevitable. This is the same as - # issue #2531: equality is also used for membership testing and must - # return a result. - a = ndarray([(1, 1.5), (2, 2.7)], shape=[2], format='ld') + # Depends on issue #15625: the struct module does not understand 'u'. + a = array.array('u', 'xyz') v = memoryview(a) - self.assertNotEqual(v, a) self.assertNotEqual(a, v) - - a = ndarray([b'12345'], shape=[1], format="s") - v = memoryview(a) self.assertNotEqual(v, a) - self.assertNotEqual(a, v) - nd = ndarray([(1,1,1), (2,2,2), (3,3,3)], shape=[3], format='iii') - v = memoryview(nd) - self.assertNotEqual(v, nd) - self.assertNotEqual(nd, v) - - # '@' prefix can be dropped: - nd1 = ndarray([1,2,3], shape=[3], format='@i') - nd2 = ndarray([1,2,3], shape=[3], format='i') - v = memoryview(nd1) - w = memoryview(nd2) - self.assertEqual(v, w) - self.assertEqual(w, v) - self.assertEqual(v, nd2) - self.assertEqual(nd2, v) - self.assertEqual(w, nd1) - self.assertEqual(nd1, w) + # Some ctypes format strings are unknown to the struct module. + if ctypes: + # format: "T{>l:x:>l:y:}" + class BEPoint(ctypes.BigEndianStructure): + _fields_ = [("x", ctypes.c_long), ("y", ctypes.c_long)] + point = BEPoint(100, 200) + a = memoryview(point) + b = memoryview(point) + self.assertNotEqual(a, b) + self.assertNotEqual(a, point) + self.assertNotEqual(point, a) + self.assertRaises(NotImplementedError, a.tolist) + + def test_memoryview_compare_ndim_zero(self): - # ndim = 0 nd1 = ndarray(1729, shape=[], format='@L') nd2 = ndarray(1729, shape=[], format='L', flags=ND_WRITABLE) v = memoryview(nd1) @@ -3124,7 +3122,37 @@ class TestBufferProtocol(unittest.TestCase): m[9] = 100 self.assertNotEqual(m, nd) - # ndim = 1: contiguous + # struct module: equal + nd1 = ndarray((1729, 1.2, b'12345'), shape=[], format='Lf5s') + nd2 = ndarray((1729, 1.2, b'12345'), shape=[], format='hf5s', + flags=ND_WRITABLE) + v = memoryview(nd1) + w = memoryview(nd2) + self.assertEqual(v, w) + self.assertEqual(w, v) + self.assertEqual(v, nd2) + self.assertEqual(nd2, v) + self.assertEqual(w, nd1) + self.assertEqual(nd1, w) + + # struct module: not equal + nd1 = ndarray((1729, 1.2, b'12345'), shape=[], format='Lf5s') + nd2 = ndarray((-1729, 1.2, b'12345'), shape=[], format='hf5s', + flags=ND_WRITABLE) + v = memoryview(nd1) + w = memoryview(nd2) + self.assertNotEqual(v, w) + self.assertNotEqual(w, v) + self.assertNotEqual(v, nd2) + self.assertNotEqual(nd2, v) + self.assertNotEqual(w, nd1) + self.assertNotEqual(nd1, w) + self.assertEqual(v, nd1) + self.assertEqual(w, nd2) + + def test_memoryview_compare_ndim_one(self): + + # contiguous nd1 = ndarray([-529, 576, -625, 676, -729], shape=[5], format='@h') nd2 = ndarray([-529, 576, -625, 676, 729], shape=[5], format='@h') v = memoryview(nd1) @@ -3136,7 +3164,19 @@ class TestBufferProtocol(unittest.TestCase): self.assertNotEqual(w, nd1) self.assertNotEqual(v, w) - # ndim = 1: non-contiguous + # contiguous, struct module + nd1 = ndarray([-529, 576, -625, 676, -729], shape=[5], format=' 1: C-contiguous - # different values + # random formats + n = 10 + for _ in range(100): + fmt, items, singleitem = randitems(n) + for flags in (0, ND_PIL): + nd = ndarray(items, shape=[n], format=fmt, flags=flags) + m = memoryview(nd) + self.assertEqual(m, nd) + + nd = nd[::-3] + m = memoryview(nd) + self.assertEqual(m, nd) + + def test_memoryview_compare_multidim_c(self): + + # C-contiguous, different values nd1 = ndarray(list(range(-15, 15)), shape=[3, 2, 5], format='@h') nd2 = ndarray(list(range(0, 30)), shape=[3, 2, 5], format='@h') v = memoryview(nd1) @@ -3208,7 +3316,19 @@ class TestBufferProtocol(unittest.TestCase): self.assertNotEqual(w, nd1) self.assertNotEqual(v, w) - # different shape + # C-contiguous, different values, struct module + nd1 = ndarray([(0, 1, 2)]*30, shape=[3, 2, 5], format='=f q xxL') + nd2 = ndarray([(-1.2, 1, 2)]*30, shape=[3, 2, 5], format='< f 2Q') + v = memoryview(nd1) + w = memoryview(nd2) + + self.assertEqual(v, nd1) + self.assertEqual(w, nd2) + self.assertNotEqual(v, nd2) + self.assertNotEqual(w, nd1) + self.assertNotEqual(v, w) + + # C-contiguous, different shape nd1 = ndarray(list(range(30)), shape=[2, 3, 5], format='L') nd2 = ndarray(list(range(30)), shape=[3, 2, 5], format='L') v = memoryview(nd1) @@ -3220,9 +3340,9 @@ class TestBufferProtocol(unittest.TestCase): self.assertNotEqual(w, nd1) self.assertNotEqual(v, w) - # different format - nd1 = ndarray(list(range(30)), shape=[2, 3, 5], format='L') - nd2 = ndarray(list(range(30)), shape=[2, 3, 5], format='l') + # C-contiguous, different shape, struct module + nd1 = ndarray([(0, 1, 2)]*21, shape=[3, 7], format='! b B xL') + nd2 = ndarray([(0, 1, 2)]*21, shape=[7, 3], format='= Qx l xxL') v = memoryview(nd1) w = memoryview(nd2) @@ -3232,8 +3352,21 @@ class TestBufferProtocol(unittest.TestCase): self.assertNotEqual(w, nd1) self.assertNotEqual(v, w) - ##### ndim > 1: Fortran contiguous - # different values + # C-contiguous, different format, struct module + nd1 = ndarray(list(range(30)), shape=[2, 3, 5], format='L') + nd2 = ndarray(list(range(30)), shape=[2, 3, 5], format='l') + v = memoryview(nd1) + w = memoryview(nd2) + + self.assertEqual(v, nd1) + self.assertEqual(w, nd2) + self.assertEqual(v, nd2) + self.assertEqual(w, nd1) + self.assertEqual(v, w) + + def test_memoryview_compare_multidim_fortran(self): + + # Fortran-contiguous, different values nd1 = ndarray(list(range(-15, 15)), shape=[5, 2, 3], format='@h', flags=ND_FORTRAN) nd2 = ndarray(list(range(0, 30)), shape=[5, 2, 3], format='@h', @@ -3247,7 +3380,21 @@ class TestBufferProtocol(unittest.TestCase): self.assertNotEqual(w, nd1) self.assertNotEqual(v, w) - # different shape + # Fortran-contiguous, different values, struct module + nd1 = ndarray([(2**64-1, -1)]*6, shape=[2, 3], format='=Qq', + flags=ND_FORTRAN) + nd2 = ndarray([(-1, 2**64-1)]*6, shape=[2, 3], format='=qQ', + flags=ND_FORTRAN) + v = memoryview(nd1) + w = memoryview(nd2) + + self.assertEqual(v, nd1) + self.assertEqual(w, nd2) + self.assertNotEqual(v, nd2) + self.assertNotEqual(w, nd1) + self.assertNotEqual(v, w) + + # Fortran-contiguous, different shape nd1 = ndarray(list(range(-15, 15)), shape=[2, 3, 5], format='l', flags=ND_FORTRAN) nd2 = ndarray(list(range(-15, 15)), shape=[3, 2, 5], format='l', @@ -3261,10 +3408,10 @@ class TestBufferProtocol(unittest.TestCase): self.assertNotEqual(w, nd1) self.assertNotEqual(v, w) - # different format - nd1 = ndarray(list(range(30)), shape=[5, 2, 3], format='@h', + # Fortran-contiguous, different shape, struct module + nd1 = ndarray(list(range(-15, 15)), shape=[2, 3, 5], format='0ll', flags=ND_FORTRAN) - nd2 = ndarray(list(range(30)), shape=[5, 2, 3], format='@b', + nd2 = ndarray(list(range(-15, 15)), shape=[3, 2, 5], format='l', flags=ND_FORTRAN) v = memoryview(nd1) w = memoryview(nd2) @@ -3275,7 +3422,23 @@ class TestBufferProtocol(unittest.TestCase): self.assertNotEqual(w, nd1) self.assertNotEqual(v, w) - ##### ndim > 1: mixed C/Fortran contiguous + # Fortran-contiguous, different format, struct module + nd1 = ndarray(list(range(30)), shape=[5, 2, 3], format='@h', + flags=ND_FORTRAN) + nd2 = ndarray(list(range(30)), shape=[5, 2, 3], format='@b', + flags=ND_FORTRAN) + v = memoryview(nd1) + w = memoryview(nd2) + + self.assertEqual(v, nd1) + self.assertEqual(w, nd2) + self.assertEqual(v, nd2) + self.assertEqual(w, nd1) + self.assertEqual(v, w) + + def test_memoryview_compare_multidim_mixed(self): + + # mixed C/Fortran contiguous lst1 = list(range(-15, 15)) lst2 = transpose(lst1, [3, 2, 5]) nd1 = ndarray(lst1, shape=[3, 2, 5], format='@l') @@ -3287,8 +3450,20 @@ class TestBufferProtocol(unittest.TestCase): self.assertEqual(w, nd2) self.assertEqual(v, w) - ##### ndim > 1: non-contiguous - # different values + # mixed C/Fortran contiguous, struct module + lst1 = [(-3.3, -22, b'x')]*30 + lst1[5] = (-2.2, -22, b'x') + lst2 = transpose(lst1, [3, 2, 5]) + nd1 = ndarray(lst1, shape=[3, 2, 5], format='d b c') + nd2 = ndarray(lst2, shape=[3, 2, 5], format='d h c', flags=ND_FORTRAN) + v = memoryview(nd1) + w = memoryview(nd2) + + self.assertEqual(v, nd1) + self.assertEqual(w, nd2) + self.assertEqual(v, w) + + # different values, non-contiguous ex1 = ndarray(list(range(40)), shape=[5, 8], format='@I') nd1 = ex1[3:1:-1, ::-2] ex2 = ndarray(list(range(40)), shape=[5, 8], format='I') @@ -3302,6 +3477,20 @@ class TestBufferProtocol(unittest.TestCase): self.assertNotEqual(w, nd1) self.assertNotEqual(v, w) + # same values, non-contiguous, struct module + ex1 = ndarray([(2**31-1, -2**31)]*22, shape=[11, 2], format='=ii') + nd1 = ex1[3:1:-1, ::-2] + ex2 = ndarray([(2**31-1, -2**31)]*22, shape=[11, 2], format='>ii') + nd2 = ex2[1:3:1, ::-2] + v = memoryview(nd1) + w = memoryview(nd2) + + self.assertEqual(v, nd1) + self.assertEqual(w, nd2) + self.assertEqual(v, nd2) + self.assertEqual(w, nd1) + self.assertEqual(v, w) + # different shape ex1 = ndarray(list(range(30)), shape=[2, 3, 5], format='b') nd1 = ex1[1:3:, ::-2] @@ -3316,10 +3505,10 @@ class TestBufferProtocol(unittest.TestCase): self.assertNotEqual(w, nd1) self.assertNotEqual(v, w) - # different format - ex1 = ndarray(list(range(30)), shape=[5, 3, 2], format='i') + # different shape, struct module + ex1 = ndarray(list(range(30)), shape=[2, 3, 5], format='B') nd1 = ex1[1:3:, ::-2] - nd2 = ndarray(list(range(30)), shape=[5, 3, 2], format='@I') + nd2 = ndarray(list(range(30)), shape=[3, 2, 5], format='b') nd2 = ex2[1:3:, ::-2] v = memoryview(nd1) w = memoryview(nd2) @@ -3330,7 +3519,35 @@ class TestBufferProtocol(unittest.TestCase): self.assertNotEqual(w, nd1) self.assertNotEqual(v, w) - ##### ndim > 1: zeros in shape + # different format, struct module + ex1 = ndarray([(2, b'123')]*30, shape=[5, 3, 2], format='b3s') + nd1 = ex1[1:3:, ::-2] + nd2 = ndarray([(2, b'123')]*30, shape=[5, 3, 2], format='i3s') + nd2 = ex2[1:3:, ::-2] + v = memoryview(nd1) + w = memoryview(nd2) + + self.assertEqual(v, nd1) + self.assertEqual(w, nd2) + self.assertNotEqual(v, nd2) + self.assertNotEqual(w, nd1) + self.assertNotEqual(v, w) + + def test_memoryview_compare_multidim_zero_shape(self): + + # zeros in shape + nd1 = ndarray(list(range(30)), shape=[0, 3, 2], format='i') + nd2 = ndarray(list(range(30)), shape=[5, 0, 2], format='@i') + v = memoryview(nd1) + w = memoryview(nd2) + + self.assertEqual(v, nd1) + self.assertEqual(w, nd2) + self.assertNotEqual(v, nd2) + self.assertNotEqual(w, nd1) + self.assertNotEqual(v, w) + + # zeros in shape, struct module nd1 = ndarray(list(range(30)), shape=[0, 3, 2], format='i') nd2 = ndarray(list(range(30)), shape=[5, 0, 2], format='@i') v = memoryview(nd1) @@ -3342,7 +3559,9 @@ class TestBufferProtocol(unittest.TestCase): self.assertNotEqual(w, nd1) self.assertNotEqual(v, w) - # ndim > 1: zero strides + def test_memoryview_compare_multidim_zero_strides(self): + + # zero strides nd1 = ndarray([900]*80, shape=[4, 5, 4], format='@L') nd2 = ndarray([900], shape=[4, 5, 4], strides=[0, 0, 0], format='L') v = memoryview(nd1) @@ -3355,7 +3574,21 @@ class TestBufferProtocol(unittest.TestCase): self.assertEqual(v, w) self.assertEqual(v.tolist(), w.tolist()) - ##### ndim > 1: suboffsets + # zero strides, struct module + nd1 = ndarray([(1, 2)]*10, shape=[2, 5], format='=lQ') + nd2 = ndarray([(1, 2)], shape=[2, 5], strides=[0, 0], format='', '!']: + x = ndarray([2**63]*120, shape=[3,5,2,2,2], format=byteorder+'Q') + y = ndarray([2**63]*120, shape=[3,5,2,2,2], format=byteorder+'Q', + flags=ND_WRITABLE|ND_FORTRAN) + y[2][3][1][1][1] = 1 + a = memoryview(x) + b = memoryview(y) + self.assertEqual(a, x) + self.assertEqual(b, y) + self.assertNotEqual(a, b) + self.assertNotEqual(a, y) + self.assertNotEqual(b, x) + + x = ndarray([(2**63, 2**31, 2**15)]*120, shape=[3,5,2,2,2], + format=byteorder+'QLH') + y = ndarray([(2**63, 2**31, 2**15)]*120, shape=[3,5,2,2,2], + format=byteorder+'QLH', flags=ND_WRITABLE|ND_FORTRAN) + y[2][3][1][1][1] = (1, 1, 1) + a = memoryview(x) + b = memoryview(y) + self.assertEqual(a, x) + self.assertEqual(b, y) + self.assertNotEqual(a, b) + self.assertNotEqual(a, y) + self.assertNotEqual(b, x) + def test_memoryview_check_released(self): a = array.array('d', [1.1, 2.2, 3.3]) @@ -3452,11 +3787,38 @@ class TestBufferProtocol(unittest.TestCase): def test_memoryview_tobytes(self): # Many implicit tests are already in self.verify(). - nd = ndarray([-529, 576, -625, 676, -729], shape=[5], format='@h') + t = (-529, 576, -625, 676, -729) + nd = ndarray(t, shape=[5], format='@h') m = memoryview(nd) + self.assertEqual(m, nd) self.assertEqual(m.tobytes(), nd.tobytes()) + nd = ndarray([t], shape=[1], format='>hQiLl') + m = memoryview(nd) + self.assertEqual(m, nd) + self.assertEqual(m.tobytes(), nd.tobytes()) + + nd = ndarray([t for _ in range(12)], shape=[2,2,3], format='=hQiLl') + m = memoryview(nd) + self.assertEqual(m, nd) + self.assertEqual(m.tobytes(), nd.tobytes()) + + nd = ndarray([t for _ in range(120)], shape=[5,2,2,3,2], + format='l:x:>l:y:}" + class BEPoint(ctypes.BigEndianStructure): + _fields_ = [("x", ctypes.c_long), ("y", ctypes.c_long)] + point = BEPoint(100, 200) + a = memoryview(point) + self.assertEqual(a.tobytes(), bytes(point)) + def test_memoryview_get_contiguous(self): # Many implicit tests are already in self.verify(). diff --git a/Misc/NEWS b/Misc/NEWS index 98f3f07..83b3cb7 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.3.0 Release Candidate 1? Core and Builtins ----------------- +- Issue #15573: memoryview comparisons are now performed by value with full + support for any valid struct module format definition. + - Issue #15316: When an item in the fromlist for __import__ doesn't exist, don't raise an error, but if an exception is raised as part of an import do let that propagate. diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index 46a8416..f547983 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -246,7 +246,7 @@ Create a new memoryview object which references the given object."); (view->suboffsets && view->suboffsets[dest->ndim-1] >= 0) Py_LOCAL_INLINE(int) -last_dim_is_contiguous(Py_buffer *dest, Py_buffer *src) +last_dim_is_contiguous(const Py_buffer *dest, const Py_buffer *src) { assert(dest->ndim > 0 && src->ndim > 0); return (!HAVE_SUBOFFSETS_IN_LAST_DIM(dest) && @@ -255,37 +255,63 @@ last_dim_is_contiguous(Py_buffer *dest, Py_buffer *src) src->strides[src->ndim-1] == src->itemsize); } -/* Check that the logical structure of the destination and source buffers - is identical. */ -static int -cmp_structure(Py_buffer *dest, Py_buffer *src) +/* This is not a general function for determining format equivalence. + It is used in copy_single() and copy_buffer() to weed out non-matching + formats. Skipping the '@' character is specifically used in slice + assignments, where the lvalue is already known to have a single character + format. This is a performance hack that could be rewritten (if properly + benchmarked). */ +Py_LOCAL_INLINE(int) +equiv_format(const Py_buffer *dest, const Py_buffer *src) { const char *dfmt, *sfmt; - int i; assert(dest->format && src->format); dfmt = dest->format[0] == '@' ? dest->format+1 : dest->format; sfmt = src->format[0] == '@' ? src->format+1 : src->format; if (strcmp(dfmt, sfmt) != 0 || - dest->itemsize != src->itemsize || - dest->ndim != src->ndim) { - goto value_error; + dest->itemsize != src->itemsize) { + return 0; } + return 1; +} + +/* Two shapes are equivalent if they are either equal or identical up + to a zero element at the same position. For example, in NumPy arrays + the shapes [1, 0, 5] and [1, 0, 7] are equivalent. */ +Py_LOCAL_INLINE(int) +equiv_shape(const Py_buffer *dest, const Py_buffer *src) +{ + int i; + + if (dest->ndim != src->ndim) + return 0; + for (i = 0; i < dest->ndim; i++) { if (dest->shape[i] != src->shape[i]) - goto value_error; + return 0; if (dest->shape[i] == 0) break; } - return 0; + return 1; +} -value_error: - PyErr_SetString(PyExc_ValueError, - "ndarray assignment: lvalue and rvalue have different structures"); - return -1; +/* Check that the logical structure of the destination and source buffers + is identical. */ +static int +equiv_structure(const Py_buffer *dest, const Py_buffer *src) +{ + if (!equiv_format(dest, src) || + !equiv_shape(dest, src)) { + PyErr_SetString(PyExc_ValueError, + "ndarray assignment: lvalue and rvalue have different structures"); + return 0; + } + + return 1; } /* Base case for recursive multi-dimensional copying. Contiguous arrays are @@ -358,7 +384,7 @@ copy_single(Py_buffer *dest, Py_buffer *src) assert(dest->ndim == 1); - if (cmp_structure(dest, src) < 0) + if (!equiv_structure(dest, src)) return -1; if (!last_dim_is_contiguous(dest, src)) { @@ -390,7 +416,7 @@ copy_buffer(Py_buffer *dest, Py_buffer *src) assert(dest->ndim > 0); - if (cmp_structure(dest, src) < 0) + if (!equiv_structure(dest, src)) return -1; if (!last_dim_is_contiguous(dest, src)) { @@ -1828,6 +1854,131 @@ err_format: /****************************************************************************/ +/* unpack using the struct module */ +/****************************************************************************/ + +/* For reasonable performance it is necessary to cache all objects required + for unpacking. An unpacker can handle the format passed to unpack_from(). + Invariant: All pointer fields of the struct should either be NULL or valid + pointers. */ +struct unpacker { + PyObject *unpack_from; /* Struct.unpack_from(format) */ + PyObject *mview; /* cached memoryview */ + char *item; /* buffer for mview */ + Py_ssize_t itemsize; /* len(item) */ +}; + +static struct unpacker * +unpacker_new(void) +{ + struct unpacker *x = PyMem_Malloc(sizeof *x); + + if (x == NULL) { + PyErr_NoMemory(); + return NULL; + } + + x->unpack_from = NULL; + x->mview = NULL; + x->item = NULL; + x->itemsize = 0; + + return x; +} + +static void +unpacker_free(struct unpacker *x) +{ + if (x) { + Py_XDECREF(x->unpack_from); + Py_XDECREF(x->mview); + PyMem_Free(x->item); + PyMem_Free(x); + } +} + +/* Return a new unpacker for the given format. */ +static struct unpacker * +struct_get_unpacker(const char *fmt, Py_ssize_t itemsize) +{ + PyObject *structmodule; /* XXX cache these two */ + PyObject *Struct = NULL; /* XXX in globals? */ + PyObject *structobj = NULL; + PyObject *format = NULL; + struct unpacker *x = NULL; + + structmodule = PyImport_ImportModule("struct"); + if (structmodule == NULL) + return NULL; + + Struct = PyObject_GetAttrString(structmodule, "Struct"); + Py_DECREF(structmodule); + if (Struct == NULL) + return NULL; + + x = unpacker_new(); + if (x == NULL) + goto error; + + format = PyBytes_FromString(fmt); + if (format == NULL) + goto error; + + structobj = PyObject_CallFunctionObjArgs(Struct, format, NULL); + if (structobj == NULL) + goto error; + + x->unpack_from = PyObject_GetAttrString(structobj, "unpack_from"); + if (x->unpack_from == NULL) + goto error; + + x->item = PyMem_Malloc(itemsize); + if (x->item == NULL) { + PyErr_NoMemory(); + goto error; + } + x->itemsize = itemsize; + + x->mview = PyMemoryView_FromMemory(x->item, itemsize, PyBUF_WRITE); + if (x->mview == NULL) + goto error; + + +out: + Py_XDECREF(Struct); + Py_XDECREF(format); + Py_XDECREF(structobj); + return x; + +error: + unpacker_free(x); + x = NULL; + goto out; +} + +/* unpack a single item */ +static PyObject * +struct_unpack_single(const char *ptr, struct unpacker *x) +{ + PyObject *v; + + memcpy(x->item, ptr, x->itemsize); + v = PyObject_CallFunctionObjArgs(x->unpack_from, x->mview, NULL); + if (v == NULL) + return NULL; + + if (PyTuple_GET_SIZE(v) == 1) { + PyObject *tmp = PyTuple_GET_ITEM(v, 0); + Py_INCREF(tmp); + Py_DECREF(v); + return tmp; + } + + return v; +} + + +/****************************************************************************/ /* Representations */ /****************************************************************************/ @@ -2261,6 +2412,58 @@ static PySequenceMethods memory_as_sequence = { /* Comparisons */ /**************************************************************************/ +#define MV_COMPARE_EX -1 /* exception */ +#define MV_COMPARE_NOT_IMPL -2 /* not implemented */ + +/* Translate a StructError to "not equal". Preserve other exceptions. */ +static int +fix_struct_error_int(void) +{ + assert(PyErr_Occurred()); + /* XXX Cannot get at StructError directly? */ + if (PyErr_ExceptionMatches(PyExc_ImportError) || + PyErr_ExceptionMatches(PyExc_MemoryError)) { + return MV_COMPARE_EX; + } + /* StructError: invalid or unknown format -> not equal */ + PyErr_Clear(); + return 0; +} + +/* Unpack and compare single items of p and q using the struct module. */ +static int +struct_unpack_cmp(const char *p, const char *q, + struct unpacker *unpack_p, struct unpacker *unpack_q) +{ + PyObject *v, *w; + int ret; + + /* At this point any exception from the struct module should not be + StructError, since both formats have been accepted already. */ + v = struct_unpack_single(p, unpack_p); + if (v == NULL) + return MV_COMPARE_EX; + + w = struct_unpack_single(q, unpack_q); + if (w == NULL) { + Py_DECREF(v); + return MV_COMPARE_EX; + } + + /* MV_COMPARE_EX == -1: exceptions are preserved */ + ret = PyObject_RichCompareBool(v, w, Py_EQ); + Py_DECREF(v); + Py_DECREF(w); + + return ret; +} + +/* Unpack and compare single items of p and q. If both p and q have the same + single element native format, the comparison uses a fast path (gcc creates + a jump table and converts memcpy into simple assignments on x86/x64). + + Otherwise, the comparison is delegated to the struct module, which is + 30-60x slower. */ #define CMP_SINGLE(p, q, type) \ do { \ type x; \ @@ -2271,11 +2474,12 @@ static PySequenceMethods memory_as_sequence = { } while (0) Py_LOCAL_INLINE(int) -unpack_cmp(const char *p, const char *q, const char *fmt) +unpack_cmp(const char *p, const char *q, char fmt, + struct unpacker *unpack_p, struct unpacker *unpack_q) { int equal; - switch (fmt[0]) { + switch (fmt) { /* signed integers and fast path for 'B' */ case 'B': return *((unsigned char *)p) == *((unsigned char *)q); @@ -2317,9 +2521,17 @@ unpack_cmp(const char *p, const char *q, const char *fmt) /* pointer */ case 'P': CMP_SINGLE(p, q, void *); return equal; - /* Py_NotImplemented */ - default: return -1; + /* use the struct module */ + case '_': + assert(unpack_p); + assert(unpack_q); + return struct_unpack_cmp(p, q, unpack_p, unpack_q); } + + /* NOT REACHED */ + PyErr_SetString(PyExc_RuntimeError, + "memoryview: internal error in richcompare"); + return MV_COMPARE_EX; } /* Base case for recursive array comparisons. Assumption: ndim == 1. */ @@ -2327,7 +2539,7 @@ static int cmp_base(const char *p, const char *q, const Py_ssize_t *shape, const Py_ssize_t *pstrides, const Py_ssize_t *psuboffsets, const Py_ssize_t *qstrides, const Py_ssize_t *qsuboffsets, - const char *fmt) + char fmt, struct unpacker *unpack_p, struct unpacker *unpack_q) { Py_ssize_t i; int equal; @@ -2335,7 +2547,7 @@ cmp_base(const char *p, const char *q, const Py_ssize_t *shape, for (i = 0; i < shape[0]; p+=pstrides[0], q+=qstrides[0], i++) { const char *xp = ADJUST_PTR(p, psuboffsets); const char *xq = ADJUST_PTR(q, qsuboffsets); - equal = unpack_cmp(xp, xq, fmt); + equal = unpack_cmp(xp, xq, fmt, unpack_p, unpack_q); if (equal <= 0) return equal; } @@ -2350,7 +2562,7 @@ cmp_rec(const char *p, const char *q, Py_ssize_t ndim, const Py_ssize_t *shape, const Py_ssize_t *pstrides, const Py_ssize_t *psuboffsets, const Py_ssize_t *qstrides, const Py_ssize_t *qsuboffsets, - const char *fmt) + char fmt, struct unpacker *unpack_p, struct unpacker *unpack_q) { Py_ssize_t i; int equal; @@ -2364,7 +2576,7 @@ cmp_rec(const char *p, const char *q, return cmp_base(p, q, shape, pstrides, psuboffsets, qstrides, qsuboffsets, - fmt); + fmt, unpack_p, unpack_q); } for (i = 0; i < shape[0]; p+=pstrides[0], q+=qstrides[0], i++) { @@ -2373,7 +2585,7 @@ cmp_rec(const char *p, const char *q, equal = cmp_rec(xp, xq, ndim-1, shape+1, pstrides+1, psuboffsets ? psuboffsets+1 : NULL, qstrides+1, qsuboffsets ? qsuboffsets+1 : NULL, - fmt); + fmt, unpack_p, unpack_q); if (equal <= 0) return equal; } @@ -2385,9 +2597,12 @@ static PyObject * memory_richcompare(PyObject *v, PyObject *w, int op) { PyObject *res; - Py_buffer wbuf, *vv, *ww = NULL; - const char *vfmt, *wfmt; - int equal = -1; /* Py_NotImplemented */ + Py_buffer wbuf, *vv; + Py_buffer *ww = NULL; + struct unpacker *unpack_v = NULL; + struct unpacker *unpack_w = NULL; + char vfmt, wfmt; + int equal = MV_COMPARE_NOT_IMPL; if (op != Py_EQ && op != Py_NE) goto result; /* Py_NotImplemented */ @@ -2414,38 +2629,59 @@ memory_richcompare(PyObject *v, PyObject *w, int op) ww = &wbuf; } - vfmt = adjust_fmt(vv); - wfmt = adjust_fmt(ww); - if (vfmt == NULL || wfmt == NULL) { - PyErr_Clear(); - goto result; /* Py_NotImplemented */ - } - - if (cmp_structure(vv, ww) < 0) { + if (!equiv_shape(vv, ww)) { PyErr_Clear(); equal = 0; goto result; } + /* Use fast unpacking for identical primitive C type formats. */ + if (get_native_fmtchar(&vfmt, vv->format) < 0) + vfmt = '_'; + if (get_native_fmtchar(&wfmt, ww->format) < 0) + wfmt = '_'; + if (vfmt == '_' || wfmt == '_' || vfmt != wfmt) { + /* Use struct module unpacking. NOTE: Even for equal format strings, + memcmp() cannot be used for item comparison since it would give + incorrect results in the case of NaNs or uninitialized padding + bytes. */ + vfmt = '_'; + unpack_v = struct_get_unpacker(vv->format, vv->itemsize); + if (unpack_v == NULL) { + equal = fix_struct_error_int(); + goto result; + } + unpack_w = struct_get_unpacker(ww->format, ww->itemsize); + if (unpack_w == NULL) { + equal = fix_struct_error_int(); + goto result; + } + } + if (vv->ndim == 0) { - equal = unpack_cmp(vv->buf, ww->buf, vfmt); + equal = unpack_cmp(vv->buf, ww->buf, + vfmt, unpack_v, unpack_w); } else if (vv->ndim == 1) { equal = cmp_base(vv->buf, ww->buf, vv->shape, vv->strides, vv->suboffsets, ww->strides, ww->suboffsets, - vfmt); + vfmt, unpack_v, unpack_w); } else { equal = cmp_rec(vv->buf, ww->buf, vv->ndim, vv->shape, vv->strides, vv->suboffsets, ww->strides, ww->suboffsets, - vfmt); + vfmt, unpack_v, unpack_w); } result: - if (equal < 0) - res = Py_NotImplemented; + if (equal < 0) { + if (equal == MV_COMPARE_NOT_IMPL) + res = Py_NotImplemented; + else /* exception */ + res = NULL; + } else if ((equal && op == Py_EQ) || (!equal && op == Py_NE)) res = Py_True; else @@ -2453,7 +2689,11 @@ result: if (ww == &wbuf) PyBuffer_Release(ww); - Py_INCREF(res); + + unpacker_free(unpack_v); + unpacker_free(unpack_w); + + Py_XINCREF(res); return res; } -- cgit v0.12