gh-129349: Accept bytes in bytes.fromhex()/bytearray.fromhex() (#129844)

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Victor Stinner <vstinner@python.org>
author: Daniel Pope <lordmauve@users.noreply.github.com> 2025-03-12 10:40:11 (GMT)
committer: GitHub <noreply@github.com> 2025-03-12 10:40:11 (GMT)
commit: e0637cebe5bf863897f2e89dfcb76be0015c1877 (patch)
tree: 54f308a209c3f9ff3210df7a55450ed5e84df7cf
parent: 405a2d74cbdef5a899c900b6897ec85fe465abd2 (diff)
download: cpython-e0637cebe5bf863897f2e89dfcb76be0015c1877.zip
cpython-e0637cebe5bf863897f2e89dfcb76be0015c1877.tar.gz
cpython-e0637cebe5bf863897f2e89dfcb76be0015c1877.tar.bz2
8 files changed, 90 insertions, 69 deletions
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index a6260ec..7b3fa21 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -2744,6 +2744,10 @@ data and are closely related to string objects in a variety of other ways.
          :meth:`bytes.fromhex` now skips all ASCII whitespace in the string,
          not just spaces.
 
+      .. versionchanged:: next
+         :meth:`bytes.fromhex` now accepts ASCII :class:`bytes` and
+         :term:`bytes-like objects <bytes-like object>` as input.
+
    A reverse conversion function exists to transform a bytes object into its
    hexadecimal representation.
 
@@ -2829,6 +2833,10 @@ objects.
          :meth:`bytearray.fromhex` now skips all ASCII whitespace in the string,
          not just spaces.
 
+      .. versionchanged:: next
+         :meth:`bytearray.fromhex` now accepts ASCII :class:`bytes` and
+         :term:`bytes-like objects <bytes-like object>` as input.
+
    A reverse conversion function exists to transform a bytearray object into its
    hexadecimal representation.
 
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 6539b23..6898b50 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -354,6 +354,10 @@ Other language changes
   (with :func:`format` or :ref:`f-strings`).
   (Contrubuted by Sergey B Kirpichev in :gh:`87790`.)
 
+* The :func:`bytes.fromhex` and :func:`bytearray.fromhex` methods now accept
+  ASCII :class:`bytes` and :term:`bytes-like objects <bytes-like object>`.
+  (Contributed by Daniel Pope in :gh:`129349`.)
+
 * ``\B`` in :mod:`regular expression <re>` now matches empty input string.
   Now it is always the opposite of ``\b``.
   (Contributed by Serhiy Storchaka in :gh:`124130`.)
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index f6ffe83..d5490a2 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -450,13 +450,34 @@ class BaseBytesTest:
 
         # check that ASCII whitespace is ignored
         self.assertEqual(self.type2test.fromhex(' 1A\n2B\t30\v'), b)
+        self.assertEqual(self.type2test.fromhex(b' 1A\n2B\t30\v'), b)
         for c in "\x09\x0A\x0B\x0C\x0D\x20":
             self.assertEqual(self.type2test.fromhex(c), self.type2test())
         for c in "\x1C\x1D\x1E\x1F\x85\xa0\u2000\u2002\u2028":
             self.assertRaises(ValueError, self.type2test.fromhex, c)
 
+        # Check that we can parse bytes and bytearray
+        tests = [
+            ("bytes", bytes),
+            ("bytearray", bytearray),
+            ("memoryview", memoryview),
+            ("array.array", lambda bs: array.array('B', bs)),
+        ]
+        for name, factory in tests:
+            with self.subTest(name=name):
+                self.assertEqual(self.type2test.fromhex(factory(b' 1A 2B 30 ')), b)
+
+        # Invalid bytes are rejected
+        for u8 in b"\0\x1C\x1D\x1E\x1F\x85\xa0":
+            b = bytes([30, 31, u8])
+            self.assertRaises(ValueError, self.type2test.fromhex, b)
+
         self.assertEqual(self.type2test.fromhex('0000'), b'\0\0')
-        self.assertRaises(TypeError, self.type2test.fromhex, b'1B')
+        with self.assertRaisesRegex(
+            TypeError,
+            r'fromhex\(\) argument must be str or bytes-like, not tuple',
+        ):
+            self.type2test.fromhex(())
         self.assertRaises(ValueError, self.type2test.fromhex, 'a')
         self.assertRaises(ValueError, self.type2test.fromhex, 'rt')
         self.assertRaises(ValueError, self.type2test.fromhex, '1a b cd')
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst
new file mode 100644
index 0000000..db2af78
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst
@@ -0,0 +1,2 @@
+:meth:`bytes.fromhex` and :meth:`bytearray.fromhex` now accepts ASCII
+:class:`bytes` and :term:`bytes-like objects <bytes-like object>`.
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 34f43eb..f1c7666 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -2533,7 +2533,7 @@ bytearray_splitlines_impl(PyByteArrayObject *self, int keepends)
 @classmethod
 bytearray.fromhex
 
-    string: unicode
+    string: object
     /
 
 Create a bytearray object from a string of hexadecimal numbers.
@@ -2543,8 +2543,8 @@ Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef')
 [clinic start generated code]*/
 
 static PyObject *
-bytearray_fromhex_impl(PyTypeObject *type, PyObject *string)
-/*[clinic end generated code: output=8f0f0b6d30fb3ba0 input=f033a16d1fb21f48]*/
+bytearray_fromhex(PyTypeObject *type, PyObject *string)
+/*[clinic end generated code: output=da84dc708e9c4b36 input=7e314e5b2d7ab484]*/
 {
     PyObject *result = _PyBytes_FromHex(string, type == &PyByteArray_Type);
     if (type != &PyByteArray_Type && result != NULL) {
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index ba642d3..ada0d00 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -2484,7 +2484,7 @@ bytes_splitlines_impl(PyBytesObject *self, int keepends)
 @classmethod
 bytes.fromhex
 
-    string: unicode
+    string: object
     /
 
 Create a bytes object from a string of hexadecimal numbers.
@@ -2494,8 +2494,8 @@ Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
 [clinic start generated code]*/
 
 static PyObject *
-bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
-/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
+bytes_fromhex(PyTypeObject *type, PyObject *string)
+/*[clinic end generated code: output=d458ec88195da6b3 input=f37d98ed51088a21]*/
 {
     PyObject *result = _PyBytes_FromHex(string, 0);
     if (type != &PyBytes_Type && result != NULL) {
@@ -2510,37 +2510,55 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
     char *buf;
     Py_ssize_t hexlen, invalid_char;
     unsigned int top, bot;
-    const Py_UCS1 *str, *end;
+    const Py_UCS1 *str, *start, *end;
     _PyBytesWriter writer;
+    Py_buffer view;
+    view.obj = NULL;
 
     _PyBytesWriter_Init(&writer);
     writer.use_bytearray = use_bytearray;
 
-    assert(PyUnicode_Check(string));
-    hexlen = PyUnicode_GET_LENGTH(string);
+    if (PyUnicode_Check(string)) {
+        hexlen = PyUnicode_GET_LENGTH(string);
 
-    if (!PyUnicode_IS_ASCII(string)) {
-        const void *data = PyUnicode_DATA(string);
-        int kind = PyUnicode_KIND(string);
-        Py_ssize_t i;
+        if (!PyUnicode_IS_ASCII(string)) {
+            const void *data = PyUnicode_DATA(string);
+            int kind = PyUnicode_KIND(string);
+            Py_ssize_t i;
 
-        /* search for the first non-ASCII character */
-        for (i = 0; i < hexlen; i++) {
-            if (PyUnicode_READ(kind, data, i) >= 128)
-                break;
+            /* search for the first non-ASCII character */
+            for (i = 0; i < hexlen; i++) {
+                if (PyUnicode_READ(kind, data, i) >= 128)
+                    break;
+            }
+            invalid_char = i;
+            goto error;
         }
-        invalid_char = i;
-        goto error;
-    }
 
-    assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
-    str = PyUnicode_1BYTE_DATA(string);
+        assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
+        str = PyUnicode_1BYTE_DATA(string);
+    }
+    else if (PyObject_CheckBuffer(string)) {
+        if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
+            return NULL;
+        }
+        hexlen = view.len;
+        str = view.buf;
+    }
+    else {
+        PyErr_Format(PyExc_TypeError,
+                     "fromhex() argument must be str or bytes-like, not %T",
+                     string);
+        return NULL;
+    }
 
     /* This overestimates if there are spaces */
     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
-    if (buf == NULL)
-        return NULL;
+    if (buf == NULL) {
+        goto release_buffer;
+    }
 
+    start = str;
     end = str + hexlen;
     while (str < end) {
         /* skip over spaces in the input */
@@ -2554,7 +2572,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
 
         top = _PyLong_DigitValue[*str];
         if (top >= 16) {
-            invalid_char = str - PyUnicode_1BYTE_DATA(string);
+            invalid_char = str - start;
             goto error;
         }
         str++;
@@ -2565,7 +2583,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
             if (str >= end){
                 invalid_char = -1;
             } else {
-                invalid_char = str - PyUnicode_1BYTE_DATA(string);
+                invalid_char = str - start;
             }
             goto error;
         }
@@ -2574,6 +2592,9 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
         *buf++ = (unsigned char)((top << 4) + bot);
     }
 
+    if (view.obj != NULL) {
+       PyBuffer_Release(&view);
+    }
     return _PyBytesWriter_Finish(&writer, buf);
 
   error:
@@ -2586,6 +2607,11 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
                      "fromhex() arg at position %zd", invalid_char);
     }
     _PyBytesWriter_Dealloc(&writer);
+
+  release_buffer:
+    if (view.obj != NULL) {
+        PyBuffer_Release(&view);
+    }
     return NULL;
 }
 
diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h
index fa105f7..8ed10d8 100644
--- a/Objects/clinic/bytearrayobject.c.h
+++ b/Objects/clinic/bytearrayobject.c.h
@@ -1601,26 +1601,6 @@ PyDoc_STRVAR(bytearray_fromhex__doc__,
 #define BYTEARRAY_FROMHEX_METHODDEF    \
     {"fromhex", (PyCFunction)bytearray_fromhex, METH_O|METH_CLASS, bytearray_fromhex__doc__},
 
-static PyObject *
-bytearray_fromhex_impl(PyTypeObject *type, PyObject *string);
-
-static PyObject *
-bytearray_fromhex(PyTypeObject *type, PyObject *arg)
-{
-    PyObject *return_value = NULL;
-    PyObject *string;
-
-    if (!PyUnicode_Check(arg)) {
-        _PyArg_BadArgument("fromhex", "argument", "str", arg);
-        goto exit;
-    }
-    string = arg;
-    return_value = bytearray_fromhex_impl(type, string);
-
-exit:
-    return return_value;
-}
-
 PyDoc_STRVAR(bytearray_hex__doc__,
 "hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n"
 "--\n"
@@ -1789,4 +1769,4 @@ bytearray_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored))
 {
     return bytearray_sizeof_impl((PyByteArrayObject *)self);
 }
-/*[clinic end generated code: output=7c924a56e0a8bfe6 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=13a4231325b7d3c1 input=a9049054013a1b77]*/
diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h
index 11cb81a..c0f61f1 100644
--- a/Objects/clinic/bytesobject.c.h
+++ b/Objects/clinic/bytesobject.c.h
@@ -1204,26 +1204,6 @@ PyDoc_STRVAR(bytes_fromhex__doc__,
 #define BYTES_FROMHEX_METHODDEF    \
     {"fromhex", (PyCFunction)bytes_fromhex, METH_O|METH_CLASS, bytes_fromhex__doc__},
 
-static PyObject *
-bytes_fromhex_impl(PyTypeObject *type, PyObject *string);
-
-static PyObject *
-bytes_fromhex(PyTypeObject *type, PyObject *arg)
-{
-    PyObject *return_value = NULL;
-    PyObject *string;
-
-    if (!PyUnicode_Check(arg)) {
-        _PyArg_BadArgument("fromhex", "argument", "str", arg);
-        goto exit;
-    }
-    string = arg;
-    return_value = bytes_fromhex_impl(type, string);
-
-exit:
-    return return_value;
-}
-
 PyDoc_STRVAR(bytes_hex__doc__,
 "hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n"
 "--\n"
@@ -1404,4 +1384,4 @@ skip_optional_pos:
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=61cb2cf6506df4c6 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=967aae4b46423586 input=a9049054013a1b77]*/
author	Daniel Pope <lordmauve@users.noreply.github.com>	2025-03-12 10:40:11 (GMT)
committer	GitHub <noreply@github.com>	2025-03-12 10:40:11 (GMT)
commit	e0637cebe5bf863897f2e89dfcb76be0015c1877 (patch)
tree	54f308a209c3f9ff3210df7a55450ed5e84df7cf
parent	405a2d74cbdef5a899c900b6897ec85fe465abd2 (diff)
download	cpython-e0637cebe5bf863897f2e89dfcb76be0015c1877.zip cpython-e0637cebe5bf863897f2e89dfcb76be0015c1877.tar.gz cpython-e0637cebe5bf863897f2e89dfcb76be0015c1877.tar.bz2