diff options
-rw-r--r-- | Doc/library/binascii.rst | 22 | ||||
-rw-r--r-- | Doc/library/stdtypes.rst | 18 | ||||
-rw-r--r-- | Include/pystrhex.h | 3 | ||||
-rw-r--r-- | Lib/test/test_binascii.py | 12 | ||||
-rw-r--r-- | Lib/test/test_bytes.py | 57 | ||||
-rw-r--r-- | Lib/test/test_doctest.py | 5 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2019-05-25-17-18-26.bpo-22385.VeVvhJ.rst | 4 | ||||
-rw-r--r-- | Modules/binascii.c | 33 | ||||
-rw-r--r-- | Modules/clinic/binascii.c.h | 115 | ||||
-rw-r--r-- | Objects/bytearrayobject.c | 34 | ||||
-rw-r--r-- | Objects/bytesobject.c | 34 | ||||
-rw-r--r-- | Objects/clinic/bytearrayobject.c.h | 71 | ||||
-rw-r--r-- | Objects/clinic/bytesobject.c.h | 71 | ||||
-rw-r--r-- | Objects/clinic/memoryobject.c.h | 74 | ||||
-rw-r--r-- | Objects/memoryobject.c | 45 | ||||
-rw-r--r-- | Python/pystrhex.c | 90 |
16 files changed, 624 insertions, 64 deletions
diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 89ecddc..98d8679 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -145,8 +145,8 @@ The :mod:`binascii` module defines the following functions: platforms, use ``crc32(data) & 0xffffffff``. -.. function:: b2a_hex(data) - hexlify(data) +.. function:: b2a_hex(data[, sep[, bytes_per_sep=1]]) + hexlify(data[, sep[, bytes_per_sep=1]]) Return the hexadecimal representation of the binary *data*. Every byte of *data* is converted into the corresponding 2-digit hex representation. The @@ -155,6 +155,24 @@ The :mod:`binascii` module defines the following functions: Similar functionality (but returning a text string) is also conveniently accessible using the :meth:`bytes.hex` method. + If *sep* is specified, it must be a single character str or bytes object. + It will be inserted in the output after every *bytes_per_sep* input bytes. + Separator placement is counted from the right end of the output by default, + if you wish to count from the left, supply a negative *bytes_per_sep* value. + + >>> import binascii + >>> binascii.b2a_hex(b'\xb9\x01\xef') + b'b901ef' + >>> binascii.hexlify(b'\xb9\x01\xef', '-') + b'b9-01-ef' + >>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2) + b'b9_01ef' + >>> binascii.b2a_hex(b'\xb9\x01\xef', b' ', -2) + b'b901 ef' + + .. versionchanged:: 3.8 + The *sep* and *bytes_per_sep* parameters were added. + .. function:: a2b_hex(hexstr) unhexlify(hexstr) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 293a1ab..fcb0da7 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2404,8 +2404,26 @@ data and are closely related to string objects in a variety of other ways. >>> b'\xf0\xf1\xf2'.hex() 'f0f1f2' + If you want to make the hex string easier to read, you can specify a + single character separator *sep* parameter to include in the output. + By default between each byte. A second optional *bytes_per_sep* + parameter controls the spacing. Positive values calculate the + separator position from the right, negative values from the left. + + >>> value = b'\xf0\xf1\xf2' + >>> value.hex('-') + 'f0-f1-f2' + >>> value.hex('_', 2) + 'f0_f1f2' + >>> b'UUDDLRLRAB'.hex(' ', -4) + '55554444 4c524c52 4142' + .. versionadded:: 3.5 + .. versionchanged:: 3.8 + :meth:`bytes.hex` now supports optional *sep* and *bytes_per_sep* + parameters to insert separators between bytes in the hex output. + Since bytes objects are sequences of integers (akin to a tuple), for a bytes object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be a bytes object of length 1. (This contrasts with text strings, where both indexing diff --git a/Include/pystrhex.h b/Include/pystrhex.h index 66a30e2..a4f3630 100644 --- a/Include/pystrhex.h +++ b/Include/pystrhex.h @@ -10,6 +10,9 @@ extern "C" { PyAPI_FUNC(PyObject*) _Py_strhex(const char* argbuf, const Py_ssize_t arglen); /* Returns a bytes() containing the ASCII hex representation of argbuf. */ PyAPI_FUNC(PyObject*) _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen); +/* These variants include support for a separator between every N bytes: */ +PyAPI_FUNC(PyObject*) _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group); +PyAPI_FUNC(PyObject*) _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group); #endif /* !Py_LIMITED_API */ #ifdef __cplusplus diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 572e50c..08de5c9 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -240,6 +240,18 @@ class BinASCIITest(unittest.TestCase): self.assertEqual(binascii.hexlify(self.type2test(s)), t) self.assertEqual(binascii.unhexlify(self.type2test(t)), u) + def test_hex_separator(self): + """Test that hexlify and b2a_hex are binary versions of bytes.hex.""" + # Logic of separators is tested in test_bytes.py. This checks that + # arg parsing works and exercises the direct to bytes object code + # path within pystrhex.c. + s = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000' + self.assertEqual(binascii.hexlify(self.type2test(s)), s.hex().encode('ascii')) + expected8 = s.hex('.', 8).encode('ascii') + self.assertEqual(binascii.hexlify(self.type2test(s), '.', 8), expected8) + expected1 = s.hex(':').encode('ascii') + self.assertEqual(binascii.b2a_hex(self.type2test(s), ':'), expected1) + def test_qp(self): type2test = self.type2test a2b_qp = binascii.a2b_qp diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 9502a8f..bbd45c7 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -417,6 +417,63 @@ class BaseBytesTest: self.assertEqual(self.type2test(b"\x1a\x2b\x30").hex(), '1a2b30') self.assertEqual(memoryview(b"\x1a\x2b\x30").hex(), '1a2b30') + def test_hex_separator_basics(self): + three_bytes = self.type2test(b'\xb9\x01\xef') + self.assertEqual(three_bytes.hex(), 'b901ef') + with self.assertRaises(ValueError): + three_bytes.hex('') + with self.assertRaises(ValueError): + three_bytes.hex('xx') + self.assertEqual(three_bytes.hex(':', 0), 'b901ef') + with self.assertRaises(TypeError): + three_bytes.hex(None, 0) + with self.assertRaises(ValueError): + three_bytes.hex('\xff') + with self.assertRaises(ValueError): + three_bytes.hex(b'\xff') + with self.assertRaises(ValueError): + three_bytes.hex(b'\x80') + with self.assertRaises(ValueError): + three_bytes.hex(chr(0x100)) + self.assertEqual(three_bytes.hex(':', 0), 'b901ef') + self.assertEqual(three_bytes.hex(b'\x00'), 'b9\x0001\x00ef') + self.assertEqual(three_bytes.hex('\x00'), 'b9\x0001\x00ef') + self.assertEqual(three_bytes.hex(b'\x7f'), 'b9\x7f01\x7fef') + self.assertEqual(three_bytes.hex('\x7f'), 'b9\x7f01\x7fef') + self.assertEqual(three_bytes.hex(':', 3), 'b901ef') + self.assertEqual(three_bytes.hex(':', 4), 'b901ef') + self.assertEqual(three_bytes.hex(':', -4), 'b901ef') + self.assertEqual(three_bytes.hex(':'), 'b9:01:ef') + self.assertEqual(three_bytes.hex(b'$'), 'b9$01$ef') + self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef') + self.assertEqual(three_bytes.hex(':', -1), 'b9:01:ef') + self.assertEqual(three_bytes.hex(':', 2), 'b9:01ef') + self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef') + self.assertEqual(three_bytes.hex('*', -2), 'b901*ef') + + value = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000' + self.assertEqual(value.hex('.', 8), '7b7305000000776f.726c646902000000.730500000068656c.6c6f690100000030') + + def test_hex_separator_five_bytes(self): + five_bytes = self.type2test(range(90,95)) + self.assertEqual(five_bytes.hex(), '5a5b5c5d5e') + + def test_hex_separator_six_bytes(self): + six_bytes = self.type2test(x*3 for x in range(1, 7)) + self.assertEqual(six_bytes.hex(), '0306090c0f12') + self.assertEqual(six_bytes.hex('.', 1), '03.06.09.0c.0f.12') + self.assertEqual(six_bytes.hex(' ', 2), '0306 090c 0f12') + self.assertEqual(six_bytes.hex('-', 3), '030609-0c0f12') + self.assertEqual(six_bytes.hex(':', 4), '0306:090c0f12') + self.assertEqual(six_bytes.hex(':', 5), '03:06090c0f12') + self.assertEqual(six_bytes.hex(':', 6), '0306090c0f12') + self.assertEqual(six_bytes.hex(':', 95), '0306090c0f12') + self.assertEqual(six_bytes.hex('_', -3), '030609_0c0f12') + self.assertEqual(six_bytes.hex(':', -4), '0306090c:0f12') + self.assertEqual(six_bytes.hex(b'@', -5), '0306090c0f@12') + self.assertEqual(six_bytes.hex(':', -6), '0306090c0f12') + self.assertEqual(six_bytes.hex(' ', -95), '0306090c0f12') + def test_join(self): self.assertEqual(self.type2test(b"").join([]), b"") self.assertEqual(self.type2test(b"").join([b""]), b"") diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index f1013f2..5ea18f5 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -665,11 +665,13 @@ plain ol' Python and is guaranteed to be available. True >>> real_tests = [t for t in tests if len(t.examples) > 0] >>> len(real_tests) # objects that actually have doctests - 9 + 12 >>> for t in real_tests: ... print('{} {}'.format(len(t.examples), t.name)) ... 1 builtins.bin + 5 builtins.bytearray.hex + 5 builtins.bytes.hex 3 builtins.float.as_integer_ratio 2 builtins.float.fromhex 2 builtins.float.hex @@ -677,6 +679,7 @@ plain ol' Python and is guaranteed to be available. 1 builtins.int 3 builtins.int.as_integer_ratio 2 builtins.int.bit_length + 5 builtins.memoryview.hex 1 builtins.oct Note here that 'bin', 'oct', and 'hex' are functions; 'float.as_integer_ratio', diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-05-25-17-18-26.bpo-22385.VeVvhJ.rst b/Misc/NEWS.d/next/Core and Builtins/2019-05-25-17-18-26.bpo-22385.VeVvhJ.rst new file mode 100644 index 0000000..e10690b --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-05-25-17-18-26.bpo-22385.VeVvhJ.rst @@ -0,0 +1,4 @@ +The `bytes.hex`, `bytearray.hex`, and `memoryview.hex` methods as well as +the `binascii.hexlify` and `b2a_hex` functions now have the ability to +include an optional separator between hex bytes. This functionality was +inspired by MicroPython's hexlify implementation. diff --git a/Modules/binascii.c b/Modules/binascii.c index d22ab7b..1c7dc35 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1159,19 +1159,33 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc) binascii.b2a_hex data: Py_buffer - / + sep: object = NULL + An optional single character or byte to separate hex bytes. + bytes_per_sep: int = 1 + How many bytes between separators. Positive values count from the + right, negative values count from the left. Hexadecimal representation of binary data. The return value is a bytes object. This function is also available as "hexlify()". + +Example: +>>> binascii.b2a_hex(b'\xb9\x01\xef') +b'b901ef' +>>> binascii.hexlify(b'\xb9\x01\xef', ':') +b'b9:01:ef' +>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2) +b'b9_01ef' [clinic start generated code]*/ static PyObject * -binascii_b2a_hex_impl(PyObject *module, Py_buffer *data) -/*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/ +binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep, + int bytes_per_sep) +/*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/ { - return _Py_strhex_bytes((const char *)data->buf, data->len); + return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len, + sep, bytes_per_sep); } /*[clinic input] @@ -1179,14 +1193,17 @@ binascii.hexlify = binascii.b2a_hex Hexadecimal representation of binary data. -The return value is a bytes object. +The return value is a bytes object. This function is also +available as "b2a_hex()". [clinic start generated code]*/ static PyObject * -binascii_hexlify_impl(PyObject *module, Py_buffer *data) -/*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/ +binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep, + int bytes_per_sep) +/*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/ { - return _Py_strhex_bytes((const char *)data->buf, data->len); + return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len, + sep, bytes_per_sep); } /*[clinic input] diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 4043d89..d485048 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -432,34 +432,78 @@ exit: } PyDoc_STRVAR(binascii_b2a_hex__doc__, -"b2a_hex($module, data, /)\n" +"b2a_hex($module, /, data, sep=None, bytes_per_sep=1)\n" "--\n" "\n" "Hexadecimal representation of binary data.\n" "\n" +" sep\n" +" An optional single character or byte to separate hex bytes.\n" +" bytes_per_sep\n" +" How many bytes between separators. Positive values count from the\n" +" right, negative values count from the left.\n" +"\n" "The return value is a bytes object. This function is also\n" -"available as \"hexlify()\"."); +"available as \"hexlify()\".\n" +"\n" +"Example:\n" +">>> binascii.b2a_hex(b\'\\xb9\\x01\\xef\')\n" +"b\'b901ef\'\n" +">>> binascii.hexlify(b\'\\xb9\\x01\\xef\', \':\')\n" +"b\'b9:01:ef\'\n" +">>> binascii.b2a_hex(b\'\\xb9\\x01\\xef\', b\'_\', 2)\n" +"b\'b9_01ef\'"); #define BINASCII_B2A_HEX_METHODDEF \ - {"b2a_hex", (PyCFunction)binascii_b2a_hex, METH_O, binascii_b2a_hex__doc__}, + {"b2a_hex", (PyCFunction)(void(*)(void))binascii_b2a_hex, METH_FASTCALL|METH_KEYWORDS, binascii_b2a_hex__doc__}, static PyObject * -binascii_b2a_hex_impl(PyObject *module, Py_buffer *data); +binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep, + int bytes_per_sep); static PyObject * -binascii_b2a_hex(PyObject *module, PyObject *arg) +binascii_b2a_hex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + static const char * const _keywords[] = {"data", "sep", "bytes_per_sep", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "b2a_hex", 0}; + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; + PyObject *sep = NULL; + int bytes_per_sep = 1; - if (PyObject_GetBuffer(arg, &data, PyBUF_SIMPLE) != 0) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { goto exit; } if (!PyBuffer_IsContiguous(&data, 'C')) { - _PyArg_BadArgument("b2a_hex", 0, "contiguous buffer", arg); + _PyArg_BadArgument("b2a_hex", 1, "contiguous buffer", args[0]); goto exit; } - return_value = binascii_b2a_hex_impl(module, &data); + if (!noptargs) { + goto skip_optional_pos; + } + if (args[1]) { + sep = args[1]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (PyFloat_Check(args[2])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + bytes_per_sep = _PyLong_AsInt(args[2]); + if (bytes_per_sep == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = binascii_b2a_hex_impl(module, &data, sep, bytes_per_sep); exit: /* Cleanup for data */ @@ -471,33 +515,70 @@ exit: } PyDoc_STRVAR(binascii_hexlify__doc__, -"hexlify($module, data, /)\n" +"hexlify($module, /, data, sep=None, bytes_per_sep=1)\n" "--\n" "\n" "Hexadecimal representation of binary data.\n" "\n" -"The return value is a bytes object."); +" sep\n" +" An optional single character or byte to separate hex bytes.\n" +" bytes_per_sep\n" +" How many bytes between separators. Positive values count from the\n" +" right, negative values count from the left.\n" +"\n" +"The return value is a bytes object. This function is also\n" +"available as \"b2a_hex()\"."); #define BINASCII_HEXLIFY_METHODDEF \ - {"hexlify", (PyCFunction)binascii_hexlify, METH_O, binascii_hexlify__doc__}, + {"hexlify", (PyCFunction)(void(*)(void))binascii_hexlify, METH_FASTCALL|METH_KEYWORDS, binascii_hexlify__doc__}, static PyObject * -binascii_hexlify_impl(PyObject *module, Py_buffer *data); +binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep, + int bytes_per_sep); static PyObject * -binascii_hexlify(PyObject *module, PyObject *arg) +binascii_hexlify(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + static const char * const _keywords[] = {"data", "sep", "bytes_per_sep", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "hexlify", 0}; + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; + PyObject *sep = NULL; + int bytes_per_sep = 1; - if (PyObject_GetBuffer(arg, &data, PyBUF_SIMPLE) != 0) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { goto exit; } if (!PyBuffer_IsContiguous(&data, 'C')) { - _PyArg_BadArgument("hexlify", 0, "contiguous buffer", arg); + _PyArg_BadArgument("hexlify", 1, "contiguous buffer", args[0]); goto exit; } - return_value = binascii_hexlify_impl(module, &data); + if (!noptargs) { + goto skip_optional_pos; + } + if (args[1]) { + sep = args[1]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (PyFloat_Check(args[2])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + bytes_per_sep = _PyLong_AsInt(args[2]); + if (bytes_per_sep == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = binascii_hexlify_impl(module, &data, sep, bytes_per_sep); exit: /* Cleanup for data */ @@ -720,4 +801,4 @@ exit: return return_value; } -/*[clinic end generated code: output=a4a38e162605aca2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f7b8049edb130c63 input=a9049054013a1b77]*/ diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index c7c2831..b9fcc01 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2020,18 +2020,36 @@ bytearray_fromhex_impl(PyTypeObject *type, PyObject *string) return result; } -PyDoc_STRVAR(hex__doc__, -"B.hex() -> string\n\ -\n\ -Create a string of hexadecimal numbers from a bytearray object.\n\ -Example: bytearray([0xb9, 0x01, 0xef]).hex() -> 'b901ef'."); +/*[clinic input] +bytearray.hex + + sep: object = NULL + An optional single character or byte to separate hex bytes. + bytes_per_sep: int = 1 + How many bytes between separators. Positive values count from the + right, negative values count from the left. + +Create a str of hexadecimal numbers from a bytearray object. + +Example: +>>> value = bytearray([0xb9, 0x01, 0xef]) +>>> value.hex() +'b901ef' +>>> value.hex(':') +'b9:01:ef' +>>> value.hex(':', 2) +'b9:01ef' +>>> value.hex(':', -2) +'b901:ef' +[clinic start generated code]*/ static PyObject * -bytearray_hex(PyBytesObject *self, PyObject *Py_UNUSED(ignored)) +bytearray_hex_impl(PyByteArrayObject *self, PyObject *sep, int bytes_per_sep) +/*[clinic end generated code: output=29c4e5ef72c565a0 input=814c15830ac8c4b5]*/ { char* argbuf = PyByteArray_AS_STRING(self); Py_ssize_t arglen = PyByteArray_GET_SIZE(self); - return _Py_strhex(argbuf, arglen); + return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep); } static PyObject * @@ -2160,7 +2178,7 @@ bytearray_methods[] = { {"find", (PyCFunction)bytearray_find, METH_VARARGS, _Py_find__doc__}, BYTEARRAY_FROMHEX_METHODDEF - {"hex", (PyCFunction)bytearray_hex, METH_NOARGS, hex__doc__}, + BYTEARRAY_HEX_METHODDEF {"index", (PyCFunction)bytearray_index, METH_VARARGS, _Py_index__doc__}, BYTEARRAY_INSERT_METHODDEF {"isalnum", stringlib_isalnum, METH_NOARGS, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 6f34037..bf7c7da 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2416,18 +2416,36 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) return NULL; } -PyDoc_STRVAR(hex__doc__, -"B.hex() -> string\n\ -\n\ -Create a string of hexadecimal numbers from a bytes object.\n\ -Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'."); +/*[clinic input] +bytes.hex + + sep: object = NULL + An optional single character or byte to separate hex bytes. + bytes_per_sep: int = 1 + How many bytes between separators. Positive values count from the + right, negative values count from the left. + +Create a str of hexadecimal numbers from a bytes object. + +Example: +>>> value = b'\xb9\x01\xef' +>>> value.hex() +'b901ef' +>>> value.hex(':') +'b9:01:ef' +>>> value.hex(':', 2) +'b9:01ef' +>>> value.hex(':', -2) +'b901:ef' +[clinic start generated code]*/ static PyObject * -bytes_hex(PyBytesObject *self, PyObject *Py_UNUSED(ignored)) +bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep) +/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/ { char* argbuf = PyBytes_AS_STRING(self); Py_ssize_t arglen = PyBytes_GET_SIZE(self); - return _Py_strhex(argbuf, arglen); + return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep); } static PyObject * @@ -2452,7 +2470,7 @@ bytes_methods[] = { {"find", (PyCFunction)bytes_find, METH_VARARGS, _Py_find__doc__}, BYTES_FROMHEX_METHODDEF - {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__}, + BYTES_HEX_METHODDEF {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__}, {"isalnum", stringlib_isalnum, METH_NOARGS, _Py_isalnum__doc__}, diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index da1dc6a..08c6eb5 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -867,6 +867,75 @@ exit: return return_value; } +PyDoc_STRVAR(bytearray_hex__doc__, +"hex($self, /, sep=None, bytes_per_sep=1)\n" +"--\n" +"\n" +"Create a str of hexadecimal numbers from a bytearray object.\n" +"\n" +" sep\n" +" An optional single character or byte to separate hex bytes.\n" +" bytes_per_sep\n" +" How many bytes between separators. Positive values count from the\n" +" right, negative values count from the left.\n" +"\n" +"Example:\n" +">>> value = bytearray([0xb9, 0x01, 0xef])\n" +">>> value.hex()\n" +"\'b901ef\'\n" +">>> value.hex(\':\')\n" +"\'b9:01:ef\'\n" +">>> value.hex(\':\', 2)\n" +"\'b9:01ef\'\n" +">>> value.hex(\':\', -2)\n" +"\'b901:ef\'"); + +#define BYTEARRAY_HEX_METHODDEF \ + {"hex", (PyCFunction)(void(*)(void))bytearray_hex, METH_FASTCALL|METH_KEYWORDS, bytearray_hex__doc__}, + +static PyObject * +bytearray_hex_impl(PyByteArrayObject *self, PyObject *sep, int bytes_per_sep); + +static PyObject * +bytearray_hex(PyByteArrayObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"sep", "bytes_per_sep", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "hex", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + PyObject *sep = NULL; + int bytes_per_sep = 1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 2, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[0]) { + sep = args[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (PyFloat_Check(args[1])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + bytes_per_sep = _PyLong_AsInt(args[1]); + if (bytes_per_sep == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = bytearray_hex_impl(self, sep, bytes_per_sep); + +exit: + return return_value; +} + PyDoc_STRVAR(bytearray_reduce__doc__, "__reduce__($self, /)\n" "--\n" @@ -942,4 +1011,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl(self); } -/*[clinic end generated code: output=272fcb836b92da32 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7848247e5469ba1b input=a9049054013a1b77]*/ diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h index b030783..69c3506 100644 --- a/Objects/clinic/bytesobject.c.h +++ b/Objects/clinic/bytesobject.c.h @@ -686,4 +686,73 @@ bytes_fromhex(PyTypeObject *type, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=af9f51b9b185567d input=a9049054013a1b77]*/ + +PyDoc_STRVAR(bytes_hex__doc__, +"hex($self, /, sep=None, bytes_per_sep=1)\n" +"--\n" +"\n" +"Create a str of hexadecimal numbers from a bytes object.\n" +"\n" +" sep\n" +" An optional single character or byte to separate hex bytes.\n" +" bytes_per_sep\n" +" How many bytes between separators. Positive values count from the\n" +" right, negative values count from the left.\n" +"\n" +"Example:\n" +">>> value = b\'\\xb9\\x01\\xef\'\n" +">>> value.hex()\n" +"\'b901ef\'\n" +">>> value.hex(\':\')\n" +"\'b9:01:ef\'\n" +">>> value.hex(\':\', 2)\n" +"\'b9:01ef\'\n" +">>> value.hex(\':\', -2)\n" +"\'b901:ef\'"); + +#define BYTES_HEX_METHODDEF \ + {"hex", (PyCFunction)(void(*)(void))bytes_hex, METH_FASTCALL|METH_KEYWORDS, bytes_hex__doc__}, + +static PyObject * +bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep); + +static PyObject * +bytes_hex(PyBytesObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"sep", "bytes_per_sep", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "hex", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + PyObject *sep = NULL; + int bytes_per_sep = 1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 2, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[0]) { + sep = args[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (PyFloat_Check(args[1])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + bytes_per_sep = _PyLong_AsInt(args[1]); + if (bytes_per_sep == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = bytes_hex_impl(self, sep, bytes_per_sep); + +exit: + return return_value; +} +/*[clinic end generated code: output=2d0a3733e13e753a input=a9049054013a1b77]*/ diff --git a/Objects/clinic/memoryobject.c.h b/Objects/clinic/memoryobject.c.h new file mode 100644 index 0000000..64fce10 --- /dev/null +++ b/Objects/clinic/memoryobject.c.h @@ -0,0 +1,74 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +PyDoc_STRVAR(memoryview_hex__doc__, +"hex($self, /, sep=None, bytes_per_sep=1)\n" +"--\n" +"\n" +"Return the data in the buffer as a str of hexadecimal numbers.\n" +"\n" +" sep\n" +" An optional single character or byte to separate hex bytes.\n" +" bytes_per_sep\n" +" How many bytes between separators. Positive values count from the\n" +" right, negative values count from the left.\n" +"\n" +"Example:\n" +">>> value = memoryview(b\'\\xb9\\x01\\xef\')\n" +">>> value.hex()\n" +"\'b901ef\'\n" +">>> value.hex(\':\')\n" +"\'b9:01:ef\'\n" +">>> value.hex(\':\', 2)\n" +"\'b9:01ef\'\n" +">>> value.hex(\':\', -2)\n" +"\'b901:ef\'"); + +#define MEMORYVIEW_HEX_METHODDEF \ + {"hex", (PyCFunction)(void(*)(void))memoryview_hex, METH_FASTCALL|METH_KEYWORDS, memoryview_hex__doc__}, + +static PyObject * +memoryview_hex_impl(PyMemoryViewObject *self, PyObject *sep, + int bytes_per_sep); + +static PyObject * +memoryview_hex(PyMemoryViewObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"sep", "bytes_per_sep", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "hex", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + PyObject *sep = NULL; + int bytes_per_sep = 1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 2, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[0]) { + sep = args[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (PyFloat_Check(args[1])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + bytes_per_sep = _PyLong_AsInt(args[1]); + if (bytes_per_sep == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = memoryview_hex_impl(self, sep, bytes_per_sep); + +exit: + return return_value; +} +/*[clinic end generated code: output=5e44e2bcf01057b5 input=a9049054013a1b77]*/ diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index 6bbb413..3955c58 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -7,6 +7,12 @@ #include "pystrhex.h" #include <stddef.h> +/*[clinic input] +class memoryview "PyMemoryViewObject *" "&PyMemoryView_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e2e49d2192835219]*/ + +#include "clinic/memoryobject.c.h" /****************************************************************************/ /* ManagedBuffer Object */ @@ -2160,8 +2166,33 @@ memory_tobytes(PyMemoryViewObject *self, PyObject *args, PyObject *kwds) return bytes; } +/*[clinic input] +memoryview.hex + + sep: object = NULL + An optional single character or byte to separate hex bytes. + bytes_per_sep: int = 1 + How many bytes between separators. Positive values count from the + right, negative values count from the left. + +Return the data in the buffer as a str of hexadecimal numbers. + +Example: +>>> value = memoryview(b'\xb9\x01\xef') +>>> value.hex() +'b901ef' +>>> value.hex(':') +'b9:01:ef' +>>> value.hex(':', 2) +'b9:01ef' +>>> value.hex(':', -2) +'b901:ef' +[clinic start generated code]*/ + static PyObject * -memory_hex(PyMemoryViewObject *self, PyObject *dummy) +memoryview_hex_impl(PyMemoryViewObject *self, PyObject *sep, + int bytes_per_sep) +/*[clinic end generated code: output=430ca760f94f3ca7 input=539f6a3a5fb56946]*/ { Py_buffer *src = VIEW_ADDR(self); PyObject *bytes; @@ -2170,7 +2201,7 @@ memory_hex(PyMemoryViewObject *self, PyObject *dummy) CHECK_RELEASED(self); if (MV_C_CONTIGUOUS(self->flags)) { - return _Py_strhex(src->buf, src->len); + return _Py_strhex_with_sep(src->buf, src->len, sep, bytes_per_sep); } bytes = PyBytes_FromStringAndSize(NULL, src->len); @@ -2182,7 +2213,9 @@ memory_hex(PyMemoryViewObject *self, PyObject *dummy) return NULL; } - ret = _Py_strhex(PyBytes_AS_STRING(bytes), PyBytes_GET_SIZE(bytes)); + ret = _Py_strhex_with_sep( + PyBytes_AS_STRING(bytes), PyBytes_GET_SIZE(bytes), + sep, bytes_per_sep); Py_DECREF(bytes); return ret; @@ -3090,10 +3123,6 @@ When order is 'C' or 'F', the data of the original array is converted to C or\n\ Fortran order. For contiguous views, 'A' returns an exact copy of the physical\n\ memory. In particular, in-memory Fortran order is preserved. For non-contiguous\n\ views, the data is converted to C first. order=None is the same as order='C'."); -PyDoc_STRVAR(memory_hex_doc, -"hex($self, /)\n--\n\ -\n\ -Return the data in the buffer as a string of hexadecimal numbers."); PyDoc_STRVAR(memory_tolist_doc, "tolist($self, /)\n--\n\ \n\ @@ -3110,7 +3139,7 @@ Return a readonly version of the memoryview."); static PyMethodDef memory_methods[] = { {"release", (PyCFunction)memory_release, METH_NOARGS, memory_release_doc}, {"tobytes", (PyCFunction)(void(*)(void))memory_tobytes, METH_VARARGS|METH_KEYWORDS, memory_tobytes_doc}, - {"hex", (PyCFunction)memory_hex, METH_NOARGS, memory_hex_doc}, + MEMORYVIEW_HEX_METHODDEF {"tolist", (PyCFunction)memory_tolist, METH_NOARGS, memory_tolist_doc}, {"cast", (PyCFunction)(void(*)(void))memory_cast, METH_VARARGS|METH_KEYWORDS, memory_cast_doc}, {"toreadonly", (PyCFunction)memory_toreadonly, METH_NOARGS, memory_toreadonly_doc}, diff --git a/Python/pystrhex.c b/Python/pystrhex.c index 028f187..695a3c3 100644 --- a/Python/pystrhex.c +++ b/Python/pystrhex.c @@ -5,40 +5,96 @@ #include "pystrhex.h" static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen, - int return_bytes) + const PyObject* sep, int bytes_per_sep_group, + const int return_bytes) { PyObject *retval; Py_UCS1* retbuf; - Py_ssize_t i, j; + Py_ssize_t i, j, resultlen = 0; + Py_UCS1 sep_char; + unsigned int abs_bytes_per_sep; + + if (sep) { + Py_ssize_t seplen = PyObject_Length(sep); + if (seplen < 0) { + return NULL; + } + if (seplen != 1) { + PyErr_SetString(PyExc_ValueError, "sep must be length 1."); + return NULL; + } + if (PyUnicode_Check(sep)) { + if (PyUnicode_READY(sep)) + return NULL; + if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) { + PyErr_SetString(PyExc_ValueError, "sep must be ASCII."); + return NULL; + } + sep_char = PyUnicode_READ_CHAR(sep, 0); + } else if (PyBytes_Check(sep)) { + sep_char = PyBytes_AS_STRING(sep)[0]; + } else { + PyErr_SetString(PyExc_TypeError, "sep must be str or bytes."); + return NULL; + } + if (sep_char > 127 && !return_bytes) { + PyErr_SetString(PyExc_ValueError, "sep must be ASCII."); + return NULL; + } + } else { + bytes_per_sep_group = 0; + } assert(arglen >= 0); - if (arglen > PY_SSIZE_T_MAX / 2) + abs_bytes_per_sep = abs(bytes_per_sep_group); + if (bytes_per_sep_group && arglen > 0) { + /* How many sep characters we'll be inserting. */ + resultlen = (arglen - 1) / abs_bytes_per_sep; + } + /* Bounds checking for our Py_ssize_t indices. */ + if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) { return PyErr_NoMemory(); + } + resultlen += arglen * 2; + + if (abs_bytes_per_sep >= arglen) { + bytes_per_sep_group = 0; + abs_bytes_per_sep = 0; + } if (return_bytes) { /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */ - retbuf = (Py_UCS1*) PyMem_Malloc(arglen*2); + retbuf = (Py_UCS1*) PyMem_Malloc(resultlen); if (!retbuf) return PyErr_NoMemory(); retval = NULL; /* silence a compiler warning, assigned later. */ } else { - retval = PyUnicode_New(arglen*2, 127); + retval = PyUnicode_New(resultlen, 127); if (!retval) return NULL; retbuf = PyUnicode_1BYTE_DATA(retval); } - /* make hex version of string, taken from shamodule.c */ - for (i=j=0; i < arglen; i++) { + /* Hexlify */ + for (i=j=0; i < arglen; ++i) { + assert(j < resultlen); unsigned char c; c = (argbuf[i] >> 4) & 0xf; retbuf[j++] = Py_hexdigits[c]; c = argbuf[i] & 0xf; retbuf[j++] = Py_hexdigits[c]; + if (bytes_per_sep_group && i < arglen - 1) { + Py_ssize_t anchor; + anchor = (bytes_per_sep_group > 0) ? (arglen - 1 - i) : (i + 1); + if (anchor % abs_bytes_per_sep == 0) { + retbuf[j++] = sep_char; + } + } } + assert(j == resultlen); if (return_bytes) { - retval = PyBytes_FromStringAndSize((const char *)retbuf, arglen*2); + retval = PyBytes_FromStringAndSize((const char *)retbuf, resultlen); PyMem_Free(retbuf); } #ifdef Py_DEBUG @@ -52,12 +108,26 @@ static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen, PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen) { - return _Py_strhex_impl(argbuf, arglen, 0); + return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0); } /* Same as above but returns a bytes() instead of str() to avoid the * need to decode the str() when bytes are needed. */ PyObject * _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen) { - return _Py_strhex_impl(argbuf, arglen, 1); + return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1); +} + +/* These variants include support for a separator between every N bytes: */ + +PyObject * _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group) +{ + return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0); +} + +/* Same as above but returns a bytes() instead of str() to avoid the + * need to decode the str() when bytes are needed. */ +PyObject * _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group) +{ + return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1); } |