diff options
author | Steve Dower <steve.dower@python.org> | 2024-04-05 14:21:16 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-05 14:21:16 (GMT) |
commit | 687616877ba540a44f82ff764b5f13d36c0f3910 (patch) | |
tree | 7e86b38ef111ccbe4725b10fb4e883d40fd558c4 | |
parent | abfa16b44bb9426312613893b6e193b02ee0304f (diff) | |
download | cpython-687616877ba540a44f82ff764b5f13d36c0f3910.zip cpython-687616877ba540a44f82ff764b5f13d36c0f3910.tar.gz cpython-687616877ba540a44f82ff764b5f13d36c0f3910.tar.bz2 |
gh-111140: PyLong_From/AsNativeBytes: Take *flags* rather than just *endianness* (GH-116053)
-rw-r--r-- | Doc/c-api/long.rst | 142 | ||||
-rw-r--r-- | Include/cpython/longobject.h | 24 | ||||
-rw-r--r-- | Lib/test/test_capi/test_long.py | 118 | ||||
-rw-r--r-- | Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst | 3 | ||||
-rw-r--r-- | Modules/_testcapi/long.c | 14 | ||||
-rw-r--r-- | Objects/longobject.c | 106 |
6 files changed, 319 insertions, 88 deletions
diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 6a7eba7..1eb8f19 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -113,24 +113,28 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. retrieved from the resulting value using :c:func:`PyLong_AsVoidPtr`. -.. c:function:: PyObject* PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, int endianness) +.. c:function:: PyObject* PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, int flags) Create a Python integer from the value contained in the first *n_bytes* of *buffer*, interpreted as a two's-complement signed number. - *endianness* may be passed ``-1`` for the native endian that CPython was - compiled with, or else ``0`` for big endian and ``1`` for little. + *flags* are as for :c:func:`PyLong_AsNativeBytes`. Passing ``-1`` will select + the native endian that CPython was compiled with and assume that the + most-significant bit is a sign bit. Passing + ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` will produce the same result as calling + :c:func:`PyLong_FromUnsignedNativeBytes`. Other flags are ignored. .. versionadded:: 3.13 -.. c:function:: PyObject* PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n_bytes, int endianness) +.. c:function:: PyObject* PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n_bytes, int flags) Create a Python integer from the value contained in the first *n_bytes* of *buffer*, interpreted as an unsigned number. - *endianness* may be passed ``-1`` for the native endian that CPython was - compiled with, or else ``0`` for big endian and ``1`` for little. + *flags* are as for :c:func:`PyLong_AsNativeBytes`. Passing ``-1`` will select + the native endian that CPython was compiled with and assume that the + most-significant bit is not a sign bit. Flags other than endian are ignored. .. versionadded:: 3.13 @@ -354,14 +358,41 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Returns ``NULL`` on error. Use :c:func:`PyErr_Occurred` to disambiguate. -.. c:function:: Py_ssize_t PyLong_AsNativeBytes(PyObject *pylong, void* buffer, Py_ssize_t n_bytes, int endianness) +.. c:function:: Py_ssize_t PyLong_AsNativeBytes(PyObject *pylong, void* buffer, Py_ssize_t n_bytes, int flags) - Copy the Python integer value to a native *buffer* of size *n_bytes*:: + Copy the Python integer value *pylong* to a native *buffer* of size + *n_bytes*. The *flags* can be set to ``-1`` to behave similarly to a C cast, + or to values documented below to control the behavior. + + Returns ``-1`` with an exception raised on error. This may happen if + *pylong* cannot be interpreted as an integer, or if *pylong* was negative + and the ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` flag was set. + + Otherwise, returns the number of bytes required to store the value. + If this is equal to or less than *n_bytes*, the entire value was copied. + All *n_bytes* of the buffer are written: large buffers are padded with + zeroes. + + If the returned value is greater than than *n_bytes*, the value was + truncated: as many of the lowest bits of the value as could fit are written, + and the higher bits are ignored. This matches the typical behavior + of a C-style downcast. + + .. note:: + + Overflow is not considered an error. If the returned value + is larger than *n_bytes*, most significant bits were discarded. + + ``0`` will never be returned. + + Values are always copied as two's-complement. + + Usage example:: int32_t value; Py_ssize_t bytes = PyLong_AsNativeBits(pylong, &value, sizeof(value), -1); if (bytes < 0) { - // A Python exception was set with the reason. + // Failed. A Python exception was set with the reason. return NULL; } else if (bytes <= (Py_ssize_t)sizeof(value)) { @@ -372,19 +403,24 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. // lowest bits of pylong. } - The above example may look *similar* to - :c:func:`PyLong_As* <PyLong_AsSize_t>` - but instead fills in a specific caller defined type and never raises an - error about of the :class:`int` *pylong*'s value regardless of *n_bytes* - or the returned byte count. + Passing zero to *n_bytes* will return the size of a buffer that would + be large enough to hold the value. This may be larger than technically + necessary, but not unreasonably so. - To get at the entire potentially big Python value, this can be used to - reserve enough space and copy it:: + .. note:: + + Passing *n_bytes=0* to this function is not an accurate way to determine + the bit length of a value. + + If *n_bytes=0*, *buffer* may be ``NULL``. + + To get at the entire Python value of an unknown size, the function can be + called twice: first to determine the buffer size, then to fill it:: // Ask how much space we need. Py_ssize_t expected = PyLong_AsNativeBits(pylong, NULL, 0, -1); if (expected < 0) { - // A Python exception was set with the reason. + // Failed. A Python exception was set with the reason. return NULL; } assert(expected != 0); // Impossible per the API definition. @@ -395,11 +431,11 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. } // Safely get the entire value. Py_ssize_t bytes = PyLong_AsNativeBits(pylong, bignum, expected, -1); - if (bytes < 0) { // Exception set. + if (bytes < 0) { // Exception has been set. free(bignum); return NULL; } - else if (bytes > expected) { // Be safe, should not be possible. + else if (bytes > expected) { // This should not be possible. PyErr_SetString(PyExc_RuntimeError, "Unexpected bignum truncation after a size check."); free(bignum); @@ -409,35 +445,51 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. // ... use bignum ... free(bignum); - *endianness* may be passed ``-1`` for the native endian that CPython was - compiled with, or ``0`` for big endian and ``1`` for little. - - Returns ``-1`` with an exception raised if *pylong* cannot be interpreted as - an integer. Otherwise, return the size of the buffer required to store the - value. If this is equal to or less than *n_bytes*, the entire value was - copied. ``0`` will never be returned. - - Unless an exception is raised, all *n_bytes* of the buffer will always be - written. In the case of truncation, as many of the lowest bits of the value - as could fit are written. This allows the caller to ignore all non-negative - results if the intent is to match the typical behavior of a C-style - downcast. No exception is set on truncation. - - Values are always copied as two's-complement and sufficient buffer will be - requested to include a sign bit. For example, this may cause an value that - fits into 8 bytes when treated as unsigned to request 9 bytes, even though - all eight bytes were copied into the buffer. What has been omitted is the - zero sign bit -- redundant if the caller's intention is to treat the value - as unsigned. - - Passing zero to *n_bytes* will return the size of a buffer that would - be large enough to hold the value. This may be larger than technically - necessary, but not unreasonably so. + *flags* is either ``-1`` (``Py_ASNATIVEBYTES_DEFAULTS``) to select defaults + that behave most like a C cast, or a combintation of the other flags in + the table below. + Note that ``-1`` cannot be combined with other flags. + + Currently, ``-1`` corresponds to + ``Py_ASNATIVEBYTES_NATIVE_ENDIAN | Py_ASNATIVEBYTES_UNSIGNED_BUFFER``. + + ============================================= ====== + Flag Value + ============================================= ====== + .. c:macro:: Py_ASNATIVEBYTES_DEFAULTS ``-1`` + .. c:macro:: Py_ASNATIVEBYTES_BIG_ENDIAN ``0`` + .. c:macro:: Py_ASNATIVEBYTES_LITTLE_ENDIAN ``1`` + .. c:macro:: Py_ASNATIVEBYTES_NATIVE_ENDIAN ``3`` + .. c:macro:: Py_ASNATIVEBYTES_UNSIGNED_BUFFER ``4`` + .. c:macro:: Py_ASNATIVEBYTES_REJECT_NEGATIVE ``8`` + ============================================= ====== + + Specifying ``Py_ASNATIVEBYTES_NATIVE_ENDIAN`` will override any other endian + flags. Passing ``2`` is reserved. + + By default, sufficient buffer will be requested to include a sign bit. + For example, when converting 128 with *n_bytes=1*, the function will return + 2 (or more) in order to store a zero sign bit. + + If ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` is specified, a zero sign bit + will be omitted from size calculations. This allows, for example, 128 to fit + in a single-byte buffer. If the destination buffer is later treated as + signed, a positive input value may become negative. + Note that the flag does not affect handling of negative values: for those, + space for a sign bit is always requested. + + Specifying ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` causes an exception to be set + if *pylong* is negative. Without this flag, negative values will be copied + provided there is enough space for at least one sign bit, regardless of + whether ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` was specified. .. note:: - Passing *n_bytes=0* to this function is not an accurate way to determine - the bit length of a value. + With the default *flags* (``-1``, or *UNSIGNED_BUFFER* without + *REJECT_NEGATIVE*), multiple Python integers can map to a single value + without overflow. For example, both ``255`` and ``-1`` fit a single-byte + buffer and set all its bits. + This matches typical C cast behavior. .. versionadded:: 3.13 diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index 07251db..189229e 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -4,11 +4,24 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); +#define Py_ASNATIVEBYTES_DEFAULTS -1 +#define Py_ASNATIVEBYTES_BIG_ENDIAN 0 +#define Py_ASNATIVEBYTES_LITTLE_ENDIAN 1 +#define Py_ASNATIVEBYTES_NATIVE_ENDIAN 3 +#define Py_ASNATIVEBYTES_UNSIGNED_BUFFER 4 +#define Py_ASNATIVEBYTES_REJECT_NEGATIVE 8 + /* PyLong_AsNativeBytes: Copy the integer value to a native variable. buffer points to the first byte of the variable. n_bytes is the number of bytes available in the buffer. Pass 0 to request the required size for the value. - endianness is -1 for native endian, 0 for big endian or 1 for little. + flags is a bitfield of the following flags: + * 1 - little endian + * 2 - native endian + * 4 - unsigned destination (e.g. don't reject copying 255 into one byte) + * 8 - raise an exception for negative inputs + If flags is -1 (all bits set), native endian is used and value truncation + behaves most like C (allows negative inputs and allow MSB set). Big endian mode will write the most significant byte into the address directly referenced by buffer; little endian will write the least significant byte into that address. @@ -24,19 +37,20 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); calculate the bit length of an integer object. */ PyAPI_FUNC(Py_ssize_t) PyLong_AsNativeBytes(PyObject* v, void* buffer, - Py_ssize_t n_bytes, int endianness); + Py_ssize_t n_bytes, int flags); /* PyLong_FromNativeBytes: Create an int value from a native integer n_bytes is the number of bytes to read from the buffer. Passing 0 will always produce the zero int. PyLong_FromUnsignedNativeBytes always produces a non-negative int. - endianness is -1 for native endian, 0 for big endian or 1 for little. + flags is the same as for PyLong_AsNativeBytes, but only supports selecting + the endianness or forcing an unsigned buffer. Returns the int object, or NULL with an exception set. */ PyAPI_FUNC(PyObject*) PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, - int endianness); + int flags); PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer, - size_t n_bytes, int endianness); + size_t n_bytes, int flags); PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op); PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op); diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index d214015..83f894e 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -483,8 +483,12 @@ class LongTests(unittest.TestCase): (-MAX_USIZE, SZ + 1), (2**255-1, 32), (-(2**255-1), 32), + (2**255, 33), + (-(2**255), 33), # if you ask, we'll say 33, but 32 would do (2**256-1, 33), (-(2**256-1), 33), + (2**256, 33), + (-(2**256), 33), ]: with self.subTest(f"sizeof-{v:X}"): buffer = bytearray(b"\x5a") @@ -523,15 +527,17 @@ class LongTests(unittest.TestCase): (-1, b'\xff' * 10, min(11, SZ)), (-42, b'\xd6', 1), (-42, b'\xff' * 10 + b'\xd6', min(11, SZ)), - # Extracts 255 into a single byte, but requests sizeof(Py_ssize_t) - (255, b'\xff', SZ), + # Extracts 255 into a single byte, but requests 2 + # (this is currently a special case, and "should" request SZ) + (255, b'\xff', 2), (255, b'\x00\xff', 2), (256, b'\x01\x00', 2), + (0x80, b'\x00' * 7 + b'\x80', min(8, SZ)), # Extracts successfully (unsigned), but requests 9 bytes (2**63, b'\x80' + b'\x00' * 7, 9), - # "Extracts", but requests 9 bytes - (-2**63, b'\x80' + b'\x00' * 7, 9), (2**63, b'\x00\x80' + b'\x00' * 7, 9), + # Extracts into 8 bytes, but if you provide 9 we'll say 9 + (-2**63, b'\x80' + b'\x00' * 7, 8), (-2**63, b'\xff\x80' + b'\x00' * 7, 9), (2**255-1, b'\x7f' + b'\xff' * 31, 32), @@ -548,10 +554,15 @@ class LongTests(unittest.TestCase): (-(2**256-1), b'\x00' * 31 + b'\x01', 33), (-(2**256-1), b'\xff' + b'\x00' * 31 + b'\x01', 33), (-(2**256-1), b'\xff\xff' + b'\x00' * 31 + b'\x01', 33), + # However, -2**255 precisely will extract into 32 bytes and return + # success. For bigger buffers, it will still succeed, but will + # return 33 + (-(2**255), b'\x80' + b'\x00' * 31, 32), + (-(2**255), b'\xff\x80' + b'\x00' * 31, 33), # The classic "Windows HRESULT as negative number" case # HRESULT hr; - # PyLong_CopyBits(<-2147467259>, &hr, sizeof(HRESULT)) + # PyLong_AsNativeBytes(<-2147467259>, &hr, sizeof(HRESULT), -1) # assert(hr == E_FAIL) (-2147467259, b'\x80\x00\x40\x05', 4), ]: @@ -569,14 +580,102 @@ class LongTests(unittest.TestCase): f"PyLong_AsNativeBytes(v, buffer, {n}, <little>)") self.assertEqual(expect_le, buffer[:n], "<little>") + # Test cases that do not request size for a sign bit when we pass the + # Py_ASNATIVEBYTES_UNSIGNED_BUFFER flag + for v, expect_be, expect_n in [ + (255, b'\xff', 1), + # We pass a 2 byte buffer so it just uses the whole thing + (255, b'\x00\xff', 2), + + (2**63, b'\x80' + b'\x00' * 7, 8), + # We pass a 9 byte buffer so it uses the whole thing + (2**63, b'\x00\x80' + b'\x00' * 7, 9), + + (2**256-1, b'\xff' * 32, 32), + # We pass a 33 byte buffer so it uses the whole thing + (2**256-1, b'\x00' + b'\xff' * 32, 33), + ]: + with self.subTest(f"{v:X}-{len(expect_be)}bytes-unsigned"): + n = len(expect_be) + buffer = bytearray(b"\xa5"*n) + self.assertEqual(expect_n, asnativebytes(v, buffer, n, 4), + f"PyLong_AsNativeBytes(v, buffer, {n}, <big|unsigned>)") + self.assertEqual(expect_n, asnativebytes(v, buffer, n, 5), + f"PyLong_AsNativeBytes(v, buffer, {n}, <little|unsigned>)") + + # Ensure Py_ASNATIVEBYTES_REJECT_NEGATIVE raises on negative value + with self.assertRaises(ValueError): + asnativebytes(-1, buffer, 0, 8) + # Check a few error conditions. These are validated in code, but are # unspecified in docs, so if we make changes to the implementation, it's # fine to just update these tests rather than preserve the behaviour. - with self.assertRaises(SystemError): - asnativebytes(1, buffer, 0, 2) with self.assertRaises(TypeError): asnativebytes('not a number', buffer, 0, -1) + def test_long_asnativebytes_fuzz(self): + import math + from random import Random + from _testcapi import ( + pylong_asnativebytes as asnativebytes, + SIZE_MAX, + ) + + # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot + SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8) + + rng = Random() + # Allocate bigger buffer than actual values are going to be + buffer = bytearray(260) + + for _ in range(1000): + n = rng.randrange(1, 256) + bytes_be = bytes([ + # Ensure the most significant byte is nonzero + rng.randrange(1, 256), + *[rng.randrange(256) for _ in range(n - 1)] + ]) + bytes_le = bytes_be[::-1] + v = int.from_bytes(bytes_le, 'little') + + expect_1 = expect_2 = (SZ, n) + if bytes_be[0] & 0x80: + # All values are positive, so if MSB is set, expect extra bit + # when we request the size or have a large enough buffer + expect_1 = (SZ, n + 1) + # When passing Py_ASNATIVEBYTES_UNSIGNED_BUFFER, we expect the + # return to be exactly the right size. + expect_2 = (n,) + + try: + actual = asnativebytes(v, buffer, 0, -1) + self.assertIn(actual, expect_1) + + actual = asnativebytes(v, buffer, len(buffer), 0) + self.assertIn(actual, expect_1) + self.assertEqual(bytes_be, buffer[-n:]) + + actual = asnativebytes(v, buffer, len(buffer), 1) + self.assertIn(actual, expect_1) + self.assertEqual(bytes_le, buffer[:n]) + + actual = asnativebytes(v, buffer, n, 4) + self.assertIn(actual, expect_2, bytes_be.hex()) + actual = asnativebytes(v, buffer, n, 5) + self.assertIn(actual, expect_2, bytes_be.hex()) + except AssertionError as ex: + value_hex = ''.join(reversed([ + f'{b:02X}{"" if i % 8 else "_"}' + for i, b in enumerate(bytes_le, start=1) + ])).strip('_') + if support.verbose: + print() + print(n, 'bytes') + print('hex =', value_hex) + print('int =', v) + raise + raise AssertionError(f"Value: 0x{value_hex}") from ex + def test_long_fromnativebytes(self): import math from _testcapi import ( @@ -617,6 +716,11 @@ class LongTests(unittest.TestCase): self.assertEqual(expect_u, fromnativebytes(v_be, n, -1, 0), f"PyLong_FromUnsignedNativeBytes(buffer, {n}, <native>)") + # Swap the unsigned request for tests and use the + # Py_ASNATIVEBYTES_UNSIGNED_BUFFER flag instead + self.assertEqual(expect_u, fromnativebytes(v_be, n, 4, 1), + f"PyLong_FromNativeBytes(buffer, {n}, <big|unsigned>)") + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst b/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst new file mode 100644 index 0000000..113db93 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst @@ -0,0 +1,3 @@ +Add additional flags to :c:func:`PyLong_AsNativeBytes` and +:c:func:`PyLong_FromNativeBytes` to allow the caller to determine how to handle +edge cases around values that fill the entire buffer. diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 28dca01..769c390 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -52,8 +52,8 @@ pylong_asnativebytes(PyObject *module, PyObject *args) { PyObject *v; Py_buffer buffer; - Py_ssize_t n, endianness; - if (!PyArg_ParseTuple(args, "Ow*nn", &v, &buffer, &n, &endianness)) { + Py_ssize_t n, flags; + if (!PyArg_ParseTuple(args, "Ow*nn", &v, &buffer, &n, &flags)) { return NULL; } if (buffer.readonly) { @@ -66,7 +66,7 @@ pylong_asnativebytes(PyObject *module, PyObject *args) PyBuffer_Release(&buffer); return NULL; } - Py_ssize_t res = PyLong_AsNativeBytes(v, buffer.buf, n, (int)endianness); + Py_ssize_t res = PyLong_AsNativeBytes(v, buffer.buf, n, (int)flags); PyBuffer_Release(&buffer); return res >= 0 ? PyLong_FromSsize_t(res) : NULL; } @@ -76,8 +76,8 @@ static PyObject * pylong_fromnativebytes(PyObject *module, PyObject *args) { Py_buffer buffer; - Py_ssize_t n, endianness, signed_; - if (!PyArg_ParseTuple(args, "y*nnn", &buffer, &n, &endianness, &signed_)) { + Py_ssize_t n, flags, signed_; + if (!PyArg_ParseTuple(args, "y*nnn", &buffer, &n, &flags, &signed_)) { return NULL; } if (buffer.len < n) { @@ -86,8 +86,8 @@ pylong_fromnativebytes(PyObject *module, PyObject *args) return NULL; } PyObject *res = signed_ - ? PyLong_FromNativeBytes(buffer.buf, n, (int)endianness) - : PyLong_FromUnsignedNativeBytes(buffer.buf, n, (int)endianness); + ? PyLong_FromNativeBytes(buffer.buf, n, (int)flags) + : PyLong_FromUnsignedNativeBytes(buffer.buf, n, (int)flags); PyBuffer_Release(&buffer); return res; } diff --git a/Objects/longobject.c b/Objects/longobject.c index cc2fe11..c4ab064 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1083,18 +1083,17 @@ _fits_in_n_bits(Py_ssize_t v, Py_ssize_t n) static inline int _resolve_endianness(int *endianness) { - if (*endianness < 0) { + if (*endianness == -1 || (*endianness & 2)) { *endianness = PY_LITTLE_ENDIAN; + } else { + *endianness &= 1; } - if (*endianness != 0 && *endianness != 1) { - PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value"); - return -1; - } + assert(*endianness == 0 || *endianness == 1); return 0; } Py_ssize_t -PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) +PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int flags) { PyLongObject *v; union { @@ -1109,7 +1108,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) return -1; } - int little_endian = endianness; + int little_endian = flags; if (_resolve_endianness(&little_endian) < 0) { return -1; } @@ -1125,6 +1124,15 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) do_decref = 1; } + if ((flags != -1 && (flags & Py_ASNATIVEBYTES_REJECT_NEGATIVE)) + && _PyLong_IsNegative(v)) { + PyErr_SetString(PyExc_ValueError, "Cannot convert negative int"); + if (do_decref) { + Py_DECREF(v); + } + return -1; + } + if (_PyLong_IsCompact(v)) { res = 0; cv.v = _PyLong_CompactValue(v); @@ -1159,6 +1167,15 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) /* If we fit, return the requested number of bytes */ if (_fits_in_n_bits(cv.v, n * 8)) { res = n; + } else if (cv.v > 0 && _fits_in_n_bits(cv.v, n * 8 + 1)) { + /* Positive values with the MSB set do not require an + * additional bit when the caller's intent is to treat them + * as unsigned. */ + if (flags == -1 || (flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER)) { + res = n; + } else { + res = n + 1; + } } } else { @@ -1199,17 +1216,55 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) _PyLong_AsByteArray(v, buffer, (size_t)n, little_endian, 1, 0); } - // More efficient calculation for number of bytes required? + /* Calculates the number of bits required for the *absolute* value + * of v. This does not take sign into account, only magnitude. */ size_t nb = _PyLong_NumBits((PyObject *)v); - /* Normally this would be((nb - 1) / 8) + 1 to avoid rounding up - * multiples of 8 to the next byte, but we add an implied bit for - * the sign and it cancels out. */ - size_t n_needed = (nb / 8) + 1; - res = (Py_ssize_t)n_needed; - if ((size_t)res != n_needed) { - PyErr_SetString(PyExc_OverflowError, - "value too large to convert"); + if (nb == (size_t)-1) { res = -1; + } else { + /* Normally this would be((nb - 1) / 8) + 1 to avoid rounding up + * multiples of 8 to the next byte, but we add an implied bit for + * the sign and it cancels out. */ + res = (Py_ssize_t)(nb / 8) + 1; + } + + /* Two edge cases exist that are best handled after extracting the + * bits. These may result in us reporting overflow when the value + * actually fits. + */ + if (n > 0 && res == n + 1 && nb % 8 == 0) { + if (_PyLong_IsNegative(v)) { + /* Values of 0x80...00 from negative values that use every + * available bit in the buffer do not require an additional + * bit to store the sign. */ + int is_edge_case = 1; + unsigned char *b = (unsigned char *)buffer; + for (Py_ssize_t i = 0; i < n && is_edge_case; ++i, ++b) { + if (i == 0) { + is_edge_case = (*b == (little_endian ? 0 : 0x80)); + } else if (i < n - 1) { + is_edge_case = (*b == 0); + } else { + is_edge_case = (*b == (little_endian ? 0x80 : 0)); + } + } + if (is_edge_case) { + res = n; + } + } + else { + /* Positive values with the MSB set do not require an + * additional bit when the caller's intent is to treat them + * as unsigned. */ + unsigned char *b = (unsigned char *)buffer; + if (b[little_endian ? n - 1 : 0] & 0x80) { + if (flags == -1 || (flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER)) { + res = n; + } else { + res = n + 1; + } + } + } } } @@ -1222,38 +1277,41 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) PyObject * -PyLong_FromNativeBytes(const void* buffer, size_t n, int endianness) +PyLong_FromNativeBytes(const void* buffer, size_t n, int flags) { if (!buffer) { PyErr_BadInternalCall(); return NULL; } - int little_endian = endianness; + int little_endian = flags; if (_resolve_endianness(&little_endian) < 0) { return NULL; } - return _PyLong_FromByteArray((const unsigned char *)buffer, n, - little_endian, 1); + return _PyLong_FromByteArray( + (const unsigned char *)buffer, + n, + little_endian, + (flags == -1 || !(flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER)) ? 1 : 0 + ); } PyObject * -PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int endianness) +PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int flags) { if (!buffer) { PyErr_BadInternalCall(); return NULL; } - int little_endian = endianness; + int little_endian = flags; if (_resolve_endianness(&little_endian) < 0) { return NULL; } - return _PyLong_FromByteArray((const unsigned char *)buffer, n, - little_endian, 0); + return _PyLong_FromByteArray((const unsigned char *)buffer, n, little_endian, 0); } |