summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/stdtypes.rst191
-rw-r--r--Include/bytesobject.h1
-rw-r--r--Include/unicodeobject.h2
-rw-r--r--Lib/test/test_bytes.py44
-rw-r--r--Lib/test/test_format.py385
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/abstract.c5
-rw-r--r--Objects/bytearrayobject.c43
-rw-r--r--Objects/bytesobject.c657
-rw-r--r--Objects/unicodeobject.c12
10 files changed, 1185 insertions, 158 deletions
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index a403d6f..55f5351 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -3057,6 +3057,197 @@ place, and instead produce new objects.
always produces a new object, even if no changes were made.
+.. _bytes-formatting:
+
+``printf``-style Bytes Formatting
+----------------------------------
+
+.. index::
+ single: formatting, bytes (%)
+ single: formatting, bytearray (%)
+ single: interpolation, bytes (%)
+ single: interpolation, bytearray (%)
+ single: bytes; formatting
+ single: bytearray; formatting
+ single: bytes; interpolation
+ single: bytearray; interpolation
+ single: printf-style formatting
+ single: sprintf-style formatting
+ single: % formatting
+ single: % interpolation
+
+.. note::
+
+ The formatting operations described here exhibit a variety of quirks that
+ lead to a number of common errors (such as failing to display tuples and
+ dictionaries correctly). If the value being printed may be a tuple or
+ dictionary, wrap it in a tuple.
+
+Bytes objects (``bytes``/``bytearray``) have one unique built-in operation:
+the ``%`` operator (modulo).
+This is also known as the bytes *formatting* or *interpolation* operator.
+Given ``format % values`` (where *format* is a bytes object), ``%`` conversion
+specifications in *format* are replaced with zero or more elements of *values*.
+The effect is similar to using the :c:func:`sprintf` in the C language.
+
+If *format* requires a single argument, *values* may be a single non-tuple
+object. [5]_ Otherwise, *values* must be a tuple with exactly the number of
+items specified by the format bytes object, or a single mapping object (for
+example, a dictionary).
+
+A conversion specifier contains two or more characters and has the following
+components, which must occur in this order:
+
+#. The ``'%'`` character, which marks the start of the specifier.
+
+#. Mapping key (optional), consisting of a parenthesised sequence of characters
+ (for example, ``(somename)``).
+
+#. Conversion flags (optional), which affect the result of some conversion
+ types.
+
+#. Minimum field width (optional). If specified as an ``'*'`` (asterisk), the
+ actual width is read from the next element of the tuple in *values*, and the
+ object to convert comes after the minimum field width and optional precision.
+
+#. Precision (optional), given as a ``'.'`` (dot) followed by the precision. If
+ specified as ``'*'`` (an asterisk), the actual precision is read from the next
+ element of the tuple in *values*, and the value to convert comes after the
+ precision.
+
+#. Length modifier (optional).
+
+#. Conversion type.
+
+When the right argument is a dictionary (or other mapping type), then the
+formats in the bytes object *must* include a parenthesised mapping key into that
+dictionary inserted immediately after the ``'%'`` character. The mapping key
+selects the value to be formatted from the mapping. For example:
+
+ >>> print(b'%(language)s has %(number)03d quote types.' %
+ ... {b'language': b"Python", b"number": 2})
+ b'Python has 002 quote types.'
+
+In this case no ``*`` specifiers may occur in a format (since they require a
+sequential parameter list).
+
+The conversion flag characters are:
+
++---------+---------------------------------------------------------------------+
+| Flag | Meaning |
++=========+=====================================================================+
+| ``'#'`` | The value conversion will use the "alternate form" (where defined |
+| | below). |
++---------+---------------------------------------------------------------------+
+| ``'0'`` | The conversion will be zero padded for numeric values. |
++---------+---------------------------------------------------------------------+
+| ``'-'`` | The converted value is left adjusted (overrides the ``'0'`` |
+| | conversion if both are given). |
++---------+---------------------------------------------------------------------+
+| ``' '`` | (a space) A blank should be left before a positive number (or empty |
+| | string) produced by a signed conversion. |
++---------+---------------------------------------------------------------------+
+| ``'+'`` | A sign character (``'+'`` or ``'-'``) will precede the conversion |
+| | (overrides a "space" flag). |
++---------+---------------------------------------------------------------------+
+
+A length modifier (``h``, ``l``, or ``L``) may be present, but is ignored as it
+is not necessary for Python -- so e.g. ``%ld`` is identical to ``%d``.
+
+The conversion types are:
+
++------------+-----------------------------------------------------+-------+
+| Conversion | Meaning | Notes |
++============+=====================================================+=======+
+| ``'d'`` | Signed integer decimal. | |
++------------+-----------------------------------------------------+-------+
+| ``'i'`` | Signed integer decimal. | |
++------------+-----------------------------------------------------+-------+
+| ``'o'`` | Signed octal value. | \(1) |
++------------+-----------------------------------------------------+-------+
+| ``'u'`` | Obsolete type -- it is identical to ``'d'``. | \(7) |
++------------+-----------------------------------------------------+-------+
+| ``'x'`` | Signed hexadecimal (lowercase). | \(2) |
++------------+-----------------------------------------------------+-------+
+| ``'X'`` | Signed hexadecimal (uppercase). | \(2) |
++------------+-----------------------------------------------------+-------+
+| ``'e'`` | Floating point exponential format (lowercase). | \(3) |
++------------+-----------------------------------------------------+-------+
+| ``'E'`` | Floating point exponential format (uppercase). | \(3) |
++------------+-----------------------------------------------------+-------+
+| ``'f'`` | Floating point decimal format. | \(3) |
++------------+-----------------------------------------------------+-------+
+| ``'F'`` | Floating point decimal format. | \(3) |
++------------+-----------------------------------------------------+-------+
+| ``'g'`` | Floating point format. Uses lowercase exponential | \(4) |
+| | format if exponent is less than -4 or not less than | |
+| | precision, decimal format otherwise. | |
++------------+-----------------------------------------------------+-------+
+| ``'G'`` | Floating point format. Uses uppercase exponential | \(4) |
+| | format if exponent is less than -4 or not less than | |
+| | precision, decimal format otherwise. | |
++------------+-----------------------------------------------------+-------+
+| ``'c'`` | Single byte (accepts integer or single | |
+| | byte objects). | |
++------------+-----------------------------------------------------+-------+
+| ``'b'`` | Bytes (any object that follows the | \(5) |
+| | :ref:`buffer protocol <bufferobjects>` or has | |
+| | :meth:`__bytes__`). | |
++------------+-----------------------------------------------------+-------+
+| ``'s'`` | ``'s'`` is an alias for ``'b'`` and should only | \(6) |
+| | be used for Python2/3 code bases. | |
++------------+-----------------------------------------------------+-------+
+| ``'a'`` | Bytes (converts any Python object using | \(5) |
+| | ``repr(obj).encode('ascii','backslashreplace)``). | |
++------------+-----------------------------------------------------+-------+
+| ``'%'`` | No argument is converted, results in a ``'%'`` | |
+| | character in the result. | |
++------------+-----------------------------------------------------+-------+
+
+Notes:
+
+(1)
+ The alternate form causes a leading zero (``'0'``) to be inserted between
+ left-hand padding and the formatting of the number if the leading character
+ of the result is not already a zero.
+
+(2)
+ The alternate form causes a leading ``'0x'`` or ``'0X'`` (depending on whether
+ the ``'x'`` or ``'X'`` format was used) to be inserted between left-hand padding
+ and the formatting of the number if the leading character of the result is not
+ already a zero.
+
+(3)
+ The alternate form causes the result to always contain a decimal point, even if
+ no digits follow it.
+
+ The precision determines the number of digits after the decimal point and
+ defaults to 6.
+
+(4)
+ The alternate form causes the result to always contain a decimal point, and
+ trailing zeroes are not removed as they would otherwise be.
+
+ The precision determines the number of significant digits before and after the
+ decimal point and defaults to 6.
+
+(5)
+ If precision is ``N``, the output is truncated to ``N`` characters.
+
+(6)
+ ``b'%s'`` is deprecated, but will not be removed during the 3.x series.
+
+(7)
+ See :pep:`237`.
+
+.. note::
+
+ The bytearray version of this method does *not* operate in place - it
+ always produces a new object, even if no changes were made.
+
+.. seealso:: :pep:`461`.
+.. versionadded:: 3.5
+
.. _typememoryview:
Memory Views
diff --git a/Include/bytesobject.h b/Include/bytesobject.h
index 0ee8d36..e379bac 100644
--- a/Include/bytesobject.h
+++ b/Include/bytesobject.h
@@ -62,6 +62,7 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *);
PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *);
#ifndef Py_LIMITED_API
PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t);
+PyAPI_FUNC(PyObject *) _PyBytes_Format(PyObject *, PyObject *);
#endif
PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, Py_ssize_t,
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 729f584..d2ffabe 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -2245,6 +2245,8 @@ PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
Py_UNICODE c
);
+PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
+
/* Create a copy of a unicode string ending with a nul character. Return NULL
and raise a MemoryError exception on memory allocation failure, otherwise
return a new allocated buffer (use PyMem_Free() to free the buffer). */
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index e8dde5d..995a908 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -461,6 +461,28 @@ class BaseBytesTest:
self.assertEqual(b.rindex(i, 3, 9), 7)
self.assertRaises(ValueError, b.rindex, w, 1, 3)
+ def test_mod(self):
+ b = b'hello, %b!'
+ orig = b
+ b = b % b'world'
+ self.assertEqual(b, b'hello, world!')
+ self.assertEqual(orig, b'hello, %b!')
+ self.assertFalse(b is orig)
+ b = b'%s / 100 = %d%%'
+ a = b % (b'seventy-nine', 79)
+ self.assertEquals(a, b'seventy-nine / 100 = 79%')
+
+ def test_imod(self):
+ b = b'hello, %b!'
+ orig = b
+ b %= b'world'
+ self.assertEqual(b, b'hello, world!')
+ self.assertEqual(orig, b'hello, %b!')
+ self.assertFalse(b is orig)
+ b = b'%s / 100 = %d%%'
+ b %= (b'seventy-nine', 79)
+ self.assertEquals(b, b'seventy-nine / 100 = 79%')
+
def test_replace(self):
b = self.type2test(b'mississippi')
self.assertEqual(b.replace(b'i', b'a'), b'massassappa')
@@ -990,6 +1012,28 @@ class ByteArrayTest(BaseBytesTest, unittest.TestCase):
b[8:] = b
self.assertEqual(b, bytearray(list(range(8)) + list(range(256))))
+ def test_mod(self):
+ b = bytearray(b'hello, %b!')
+ orig = b
+ b = b % b'world'
+ self.assertEqual(b, b'hello, world!')
+ self.assertEqual(orig, bytearray(b'hello, %b!'))
+ self.assertFalse(b is orig)
+ b = bytearray(b'%s / 100 = %d%%')
+ a = b % (b'seventy-nine', 79)
+ self.assertEquals(a, bytearray(b'seventy-nine / 100 = 79%'))
+
+ def test_imod(self):
+ b = bytearray(b'hello, %b!')
+ orig = b
+ b %= b'world'
+ self.assertEqual(b, b'hello, world!')
+ self.assertEqual(orig, bytearray(b'hello, %b!'))
+ self.assertFalse(b is orig)
+ b = bytearray(b'%s / 100 = %d%%')
+ b %= (b'seventy-nine', 79)
+ self.assertEquals(b, bytearray(b'seventy-nine / 100 = 79%'))
+
def test_iconcat(self):
b = bytearray(b"abc")
b1 = b
diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py
index 631bf35..9d1f5d3 100644
--- a/Lib/test/test_format.py
+++ b/Lib/test/test_format.py
@@ -9,7 +9,7 @@ maxsize = support.MAX_Py_ssize_t
# test string formatting operator (I am not sure if this is being tested
# elsewhere but, surely, some of the given cases are *not* tested because
# they crash python)
-# test on unicode strings as well
+# test on bytes object as well
def testformat(formatstr, args, output=None, limit=None, overflowok=False):
if verbose:
@@ -46,181 +46,209 @@ def testformat(formatstr, args, output=None, limit=None, overflowok=False):
if verbose:
print('yes')
+def testcommon(formatstr, args, output=None, limit=None, overflowok=False):
+ # if formatstr is a str, test str, bytes, and bytearray;
+ # otherwise, test bytes and bytearry
+ if isinstance(formatstr, str):
+ testformat(formatstr, args, output, limit, overflowok)
+ b_format = formatstr.encode('ascii')
+ else:
+ b_format = formatstr
+ ba_format = bytearray(b_format)
+ b_args = []
+ if not isinstance(args, tuple):
+ args = (args, )
+ b_args = tuple(args)
+ if output is None:
+ b_output = ba_output = None
+ else:
+ if isinstance(output, str):
+ b_output = output.encode('ascii')
+ else:
+ b_output = output
+ ba_output = bytearray(b_output)
+ testformat(b_format, b_args, b_output, limit, overflowok)
+ testformat(ba_format, b_args, ba_output, limit, overflowok)
+
class FormatTest(unittest.TestCase):
- def test_format(self):
- testformat("%.1d", (1,), "1")
- testformat("%.*d", (sys.maxsize,1), overflowok=True) # expect overflow
- testformat("%.100d", (1,), '00000000000000000000000000000000000000'
+
+ def test_common_format(self):
+ # test the format identifiers that work the same across
+ # str, bytes, and bytearrays (integer, float, oct, hex)
+ testcommon("%.1d", (1,), "1")
+ testcommon("%.*d", (sys.maxsize,1), overflowok=True) # expect overflow
+ testcommon("%.100d", (1,), '00000000000000000000000000000000000000'
'000000000000000000000000000000000000000000000000000000'
'00000001', overflowok=True)
- testformat("%#.117x", (1,), '0x00000000000000000000000000000000000'
+ testcommon("%#.117x", (1,), '0x00000000000000000000000000000000000'
'000000000000000000000000000000000000000000000000000000'
'0000000000000000000000000001',
overflowok=True)
- testformat("%#.118x", (1,), '0x00000000000000000000000000000000000'
+ testcommon("%#.118x", (1,), '0x00000000000000000000000000000000000'
'000000000000000000000000000000000000000000000000000000'
'00000000000000000000000000001',
overflowok=True)
- testformat("%f", (1.0,), "1.000000")
+ testcommon("%f", (1.0,), "1.000000")
# these are trying to test the limits of the internal magic-number-length
# formatting buffer, if that number changes then these tests are less
# effective
- testformat("%#.*g", (109, -1.e+49/3.))
- testformat("%#.*g", (110, -1.e+49/3.))
- testformat("%#.*g", (110, -1.e+100/3.))
+ testcommon("%#.*g", (109, -1.e+49/3.))
+ testcommon("%#.*g", (110, -1.e+49/3.))
+ testcommon("%#.*g", (110, -1.e+100/3.))
# test some ridiculously large precision, expect overflow
- testformat('%12.*f', (123456, 1.0))
+ testcommon('%12.*f', (123456, 1.0))
# check for internal overflow validation on length of precision
# these tests should no longer cause overflow in Python
# 2.7/3.1 and later.
- testformat("%#.*g", (110, -1.e+100/3.))
- testformat("%#.*G", (110, -1.e+100/3.))
- testformat("%#.*f", (110, -1.e+100/3.))
- testformat("%#.*F", (110, -1.e+100/3.))
+ testcommon("%#.*g", (110, -1.e+100/3.))
+ testcommon("%#.*G", (110, -1.e+100/3.))
+ testcommon("%#.*f", (110, -1.e+100/3.))
+ testcommon("%#.*F", (110, -1.e+100/3.))
# Formatting of integers. Overflow is not ok
- testformat("%x", 10, "a")
- testformat("%x", 100000000000, "174876e800")
- testformat("%o", 10, "12")
- testformat("%o", 100000000000, "1351035564000")
- testformat("%d", 10, "10")
- testformat("%d", 100000000000, "100000000000")
+ testcommon("%x", 10, "a")
+ testcommon("%x", 100000000000, "174876e800")
+ testcommon("%o", 10, "12")
+ testcommon("%o", 100000000000, "1351035564000")
+ testcommon("%d", 10, "10")
+ testcommon("%d", 100000000000, "100000000000")
big = 123456789012345678901234567890
- testformat("%d", big, "123456789012345678901234567890")
- testformat("%d", -big, "-123456789012345678901234567890")
- testformat("%5d", -big, "-123456789012345678901234567890")
- testformat("%31d", -big, "-123456789012345678901234567890")
- testformat("%32d", -big, " -123456789012345678901234567890")
- testformat("%-32d", -big, "-123456789012345678901234567890 ")
- testformat("%032d", -big, "-0123456789012345678901234567890")
- testformat("%-032d", -big, "-123456789012345678901234567890 ")
- testformat("%034d", -big, "-000123456789012345678901234567890")
- testformat("%034d", big, "0000123456789012345678901234567890")
- testformat("%0+34d", big, "+000123456789012345678901234567890")
- testformat("%+34d", big, " +123456789012345678901234567890")
- testformat("%34d", big, " 123456789012345678901234567890")
- testformat("%.2d", big, "123456789012345678901234567890")
- testformat("%.30d", big, "123456789012345678901234567890")
- testformat("%.31d", big, "0123456789012345678901234567890")
- testformat("%32.31d", big, " 0123456789012345678901234567890")
- testformat("%d", float(big), "123456________________________", 6)
+ testcommon("%d", big, "123456789012345678901234567890")
+ testcommon("%d", -big, "-123456789012345678901234567890")
+ testcommon("%5d", -big, "-123456789012345678901234567890")
+ testcommon("%31d", -big, "-123456789012345678901234567890")
+ testcommon("%32d", -big, " -123456789012345678901234567890")
+ testcommon("%-32d", -big, "-123456789012345678901234567890 ")
+ testcommon("%032d", -big, "-0123456789012345678901234567890")
+ testcommon("%-032d", -big, "-123456789012345678901234567890 ")
+ testcommon("%034d", -big, "-000123456789012345678901234567890")
+ testcommon("%034d", big, "0000123456789012345678901234567890")
+ testcommon("%0+34d", big, "+000123456789012345678901234567890")
+ testcommon("%+34d", big, " +123456789012345678901234567890")
+ testcommon("%34d", big, " 123456789012345678901234567890")
+ testcommon("%.2d", big, "123456789012345678901234567890")
+ testcommon("%.30d", big, "123456789012345678901234567890")
+ testcommon("%.31d", big, "0123456789012345678901234567890")
+ testcommon("%32.31d", big, " 0123456789012345678901234567890")
+ testcommon("%d", float(big), "123456________________________", 6)
big = 0x1234567890abcdef12345 # 21 hex digits
- testformat("%x", big, "1234567890abcdef12345")
- testformat("%x", -big, "-1234567890abcdef12345")
- testformat("%5x", -big, "-1234567890abcdef12345")
- testformat("%22x", -big, "-1234567890abcdef12345")
- testformat("%23x", -big, " -1234567890abcdef12345")
- testformat("%-23x", -big, "-1234567890abcdef12345 ")
- testformat("%023x", -big, "-01234567890abcdef12345")
- testformat("%-023x", -big, "-1234567890abcdef12345 ")
- testformat("%025x", -big, "-0001234567890abcdef12345")
- testformat("%025x", big, "00001234567890abcdef12345")
- testformat("%0+25x", big, "+0001234567890abcdef12345")
- testformat("%+25x", big, " +1234567890abcdef12345")
- testformat("%25x", big, " 1234567890abcdef12345")
- testformat("%.2x", big, "1234567890abcdef12345")
- testformat("%.21x", big, "1234567890abcdef12345")
- testformat("%.22x", big, "01234567890abcdef12345")
- testformat("%23.22x", big, " 01234567890abcdef12345")
- testformat("%-23.22x", big, "01234567890abcdef12345 ")
- testformat("%X", big, "1234567890ABCDEF12345")
- testformat("%#X", big, "0X1234567890ABCDEF12345")
- testformat("%#x", big, "0x1234567890abcdef12345")
- testformat("%#x", -big, "-0x1234567890abcdef12345")
- testformat("%#.23x", -big, "-0x001234567890abcdef12345")
- testformat("%#+.23x", big, "+0x001234567890abcdef12345")
- testformat("%# .23x", big, " 0x001234567890abcdef12345")
- testformat("%#+.23X", big, "+0X001234567890ABCDEF12345")
- testformat("%#-+.23X", big, "+0X001234567890ABCDEF12345")
- testformat("%#-+26.23X", big, "+0X001234567890ABCDEF12345")
- testformat("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ")
- testformat("%#+27.23X", big, " +0X001234567890ABCDEF12345")
+ testcommon("%x", big, "1234567890abcdef12345")
+ testcommon("%x", -big, "-1234567890abcdef12345")
+ testcommon("%5x", -big, "-1234567890abcdef12345")
+ testcommon("%22x", -big, "-1234567890abcdef12345")
+ testcommon("%23x", -big, " -1234567890abcdef12345")
+ testcommon("%-23x", -big, "-1234567890abcdef12345 ")
+ testcommon("%023x", -big, "-01234567890abcdef12345")
+ testcommon("%-023x", -big, "-1234567890abcdef12345 ")
+ testcommon("%025x", -big, "-0001234567890abcdef12345")
+ testcommon("%025x", big, "00001234567890abcdef12345")
+ testcommon("%0+25x", big, "+0001234567890abcdef12345")
+ testcommon("%+25x", big, " +1234567890abcdef12345")
+ testcommon("%25x", big, " 1234567890abcdef12345")
+ testcommon("%.2x", big, "1234567890abcdef12345")
+ testcommon("%.21x", big, "1234567890abcdef12345")
+ testcommon("%.22x", big, "01234567890abcdef12345")
+ testcommon("%23.22x", big, " 01234567890abcdef12345")
+ testcommon("%-23.22x", big, "01234567890abcdef12345 ")
+ testcommon("%X", big, "1234567890ABCDEF12345")
+ testcommon("%#X", big, "0X1234567890ABCDEF12345")
+ testcommon("%#x", big, "0x1234567890abcdef12345")
+ testcommon("%#x", -big, "-0x1234567890abcdef12345")
+ testcommon("%#.23x", -big, "-0x001234567890abcdef12345")
+ testcommon("%#+.23x", big, "+0x001234567890abcdef12345")
+ testcommon("%# .23x", big, " 0x001234567890abcdef12345")
+ testcommon("%#+.23X", big, "+0X001234567890ABCDEF12345")
+ testcommon("%#-+.23X", big, "+0X001234567890ABCDEF12345")
+ testcommon("%#-+26.23X", big, "+0X001234567890ABCDEF12345")
+ testcommon("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ")
+ testcommon("%#+27.23X", big, " +0X001234567890ABCDEF12345")
# next one gets two leading zeroes from precision, and another from the
# 0 flag and the width
- testformat("%#+027.23X", big, "+0X0001234567890ABCDEF12345")
+ testcommon("%#+027.23X", big, "+0X0001234567890ABCDEF12345")
# same, except no 0 flag
- testformat("%#+27.23X", big, " +0X001234567890ABCDEF12345")
+ testcommon("%#+27.23X", big, " +0X001234567890ABCDEF12345")
big = 0o12345670123456701234567012345670 # 32 octal digits
- testformat("%o", big, "12345670123456701234567012345670")
- testformat("%o", -big, "-12345670123456701234567012345670")
- testformat("%5o", -big, "-12345670123456701234567012345670")
- testformat("%33o", -big, "-12345670123456701234567012345670")
- testformat("%34o", -big, " -12345670123456701234567012345670")
- testformat("%-34o", -big, "-12345670123456701234567012345670 ")
- testformat("%034o", -big, "-012345670123456701234567012345670")
- testformat("%-034o", -big, "-12345670123456701234567012345670 ")
- testformat("%036o", -big, "-00012345670123456701234567012345670")
- testformat("%036o", big, "000012345670123456701234567012345670")
- testformat("%0+36o", big, "+00012345670123456701234567012345670")
- testformat("%+36o", big, " +12345670123456701234567012345670")
- testformat("%36o", big, " 12345670123456701234567012345670")
- testformat("%.2o", big, "12345670123456701234567012345670")
- testformat("%.32o", big, "12345670123456701234567012345670")
- testformat("%.33o", big, "012345670123456701234567012345670")
- testformat("%34.33o", big, " 012345670123456701234567012345670")
- testformat("%-34.33o", big, "012345670123456701234567012345670 ")
- testformat("%o", big, "12345670123456701234567012345670")
- testformat("%#o", big, "0o12345670123456701234567012345670")
- testformat("%#o", -big, "-0o12345670123456701234567012345670")
- testformat("%#.34o", -big, "-0o0012345670123456701234567012345670")
- testformat("%#+.34o", big, "+0o0012345670123456701234567012345670")
- testformat("%# .34o", big, " 0o0012345670123456701234567012345670")
- testformat("%#+.34o", big, "+0o0012345670123456701234567012345670")
- testformat("%#-+.34o", big, "+0o0012345670123456701234567012345670")
- testformat("%#-+37.34o", big, "+0o0012345670123456701234567012345670")
- testformat("%#+37.34o", big, "+0o0012345670123456701234567012345670")
+ testcommon("%o", big, "12345670123456701234567012345670")
+ testcommon("%o", -big, "-12345670123456701234567012345670")
+ testcommon("%5o", -big, "-12345670123456701234567012345670")
+ testcommon("%33o", -big, "-12345670123456701234567012345670")
+ testcommon("%34o", -big, " -12345670123456701234567012345670")
+ testcommon("%-34o", -big, "-12345670123456701234567012345670 ")
+ testcommon("%034o", -big, "-012345670123456701234567012345670")
+ testcommon("%-034o", -big, "-12345670123456701234567012345670 ")
+ testcommon("%036o", -big, "-00012345670123456701234567012345670")
+ testcommon("%036o", big, "000012345670123456701234567012345670")
+ testcommon("%0+36o", big, "+00012345670123456701234567012345670")
+ testcommon("%+36o", big, " +12345670123456701234567012345670")
+ testcommon("%36o", big, " 12345670123456701234567012345670")
+ testcommon("%.2o", big, "12345670123456701234567012345670")
+ testcommon("%.32o", big, "12345670123456701234567012345670")
+ testcommon("%.33o", big, "012345670123456701234567012345670")
+ testcommon("%34.33o", big, " 012345670123456701234567012345670")
+ testcommon("%-34.33o", big, "012345670123456701234567012345670 ")
+ testcommon("%o", big, "12345670123456701234567012345670")
+ testcommon("%#o", big, "0o12345670123456701234567012345670")
+ testcommon("%#o", -big, "-0o12345670123456701234567012345670")
+ testcommon("%#.34o", -big, "-0o0012345670123456701234567012345670")
+ testcommon("%#+.34o", big, "+0o0012345670123456701234567012345670")
+ testcommon("%# .34o", big, " 0o0012345670123456701234567012345670")
+ testcommon("%#+.34o", big, "+0o0012345670123456701234567012345670")
+ testcommon("%#-+.34o", big, "+0o0012345670123456701234567012345670")
+ testcommon("%#-+37.34o", big, "+0o0012345670123456701234567012345670")
+ testcommon("%#+37.34o", big, "+0o0012345670123456701234567012345670")
# next one gets one leading zero from precision
- testformat("%.33o", big, "012345670123456701234567012345670")
+ testcommon("%.33o", big, "012345670123456701234567012345670")
# base marker shouldn't change that, since "0" is redundant
- testformat("%#.33o", big, "0o012345670123456701234567012345670")
+ testcommon("%#.33o", big, "0o012345670123456701234567012345670")
# but reduce precision, and base marker should add a zero
- testformat("%#.32o", big, "0o12345670123456701234567012345670")
+ testcommon("%#.32o", big, "0o12345670123456701234567012345670")
# one leading zero from precision, and another from "0" flag & width
- testformat("%034.33o", big, "0012345670123456701234567012345670")
+ testcommon("%034.33o", big, "0012345670123456701234567012345670")
# base marker shouldn't change that
- testformat("%0#34.33o", big, "0o012345670123456701234567012345670")
+ testcommon("%0#34.33o", big, "0o012345670123456701234567012345670")
# Some small ints, in both Python int and flavors).
- testformat("%d", 42, "42")
- testformat("%d", -42, "-42")
- testformat("%d", 42, "42")
- testformat("%d", -42, "-42")
- testformat("%d", 42.0, "42")
- testformat("%#x", 1, "0x1")
- testformat("%#x", 1, "0x1")
- testformat("%#X", 1, "0X1")
- testformat("%#X", 1, "0X1")
- testformat("%#o", 1, "0o1")
- testformat("%#o", 1, "0o1")
- testformat("%#o", 0, "0o0")
- testformat("%#o", 0, "0o0")
- testformat("%o", 0, "0")
- testformat("%o", 0, "0")
- testformat("%d", 0, "0")
- testformat("%d", 0, "0")
- testformat("%#x", 0, "0x0")
- testformat("%#x", 0, "0x0")
- testformat("%#X", 0, "0X0")
- testformat("%#X", 0, "0X0")
- testformat("%x", 0x42, "42")
- testformat("%x", -0x42, "-42")
- testformat("%x", 0x42, "42")
- testformat("%x", -0x42, "-42")
- testformat("%o", 0o42, "42")
- testformat("%o", -0o42, "-42")
- testformat("%o", 0o42, "42")
- testformat("%o", -0o42, "-42")
+ testcommon("%d", 42, "42")
+ testcommon("%d", -42, "-42")
+ testcommon("%d", 42, "42")
+ testcommon("%d", -42, "-42")
+ testcommon("%d", 42.0, "42")
+ testcommon("%#x", 1, "0x1")
+ testcommon("%#x", 1, "0x1")
+ testcommon("%#X", 1, "0X1")
+ testcommon("%#X", 1, "0X1")
+ testcommon("%#o", 1, "0o1")
+ testcommon("%#o", 1, "0o1")
+ testcommon("%#o", 0, "0o0")
+ testcommon("%#o", 0, "0o0")
+ testcommon("%o", 0, "0")
+ testcommon("%o", 0, "0")
+ testcommon("%d", 0, "0")
+ testcommon("%d", 0, "0")
+ testcommon("%#x", 0, "0x0")
+ testcommon("%#x", 0, "0x0")
+ testcommon("%#X", 0, "0X0")
+ testcommon("%#X", 0, "0X0")
+ testcommon("%x", 0x42, "42")
+ testcommon("%x", -0x42, "-42")
+ testcommon("%x", 0x42, "42")
+ testcommon("%x", -0x42, "-42")
+ testcommon("%o", 0o42, "42")
+ testcommon("%o", -0o42, "-42")
+ testcommon("%o", 0o42, "42")
+ testcommon("%o", -0o42, "-42")
+ # alternate float formatting
+ testcommon('%g', 1.1, '1.1')
+ testcommon('%#g', 1.1, '1.10000')
+
+ def test_str_format(self):
testformat("%r", "\u0378", "'\\u0378'") # non printable
testformat("%a", "\u0378", "'\\u0378'") # non printable
testformat("%r", "\u0374", "'\u0374'") # printable
testformat("%a", "\u0374", "'\\u0374'") # printable
- # alternate float formatting
- testformat('%g', 1.1, '1.1')
- testformat('%#g', 1.1, '1.10000')
-
- # Test exception for unknown format characters
+ # Test exception for unknown format characters, etc.
if verbose:
print('Testing exceptions')
def test_exc(formatstr, args, exception, excmsg):
@@ -247,8 +275,83 @@ class FormatTest(unittest.TestCase):
test_exc('%g', '1', TypeError, "a float is required")
test_exc('no format', '1', TypeError,
"not all arguments converted during string formatting")
- test_exc('no format', '1', TypeError,
- "not all arguments converted during string formatting")
+
+ if maxsize == 2**31-1:
+ # crashes 2.2.1 and earlier:
+ try:
+ "%*d"%(maxsize, -127)
+ except MemoryError:
+ pass
+ else:
+ raise TestFailed('"%*d"%(maxsize, -127) should fail')
+
+ def test_bytes_and_bytearray_format(self):
+ # %c will insert a single byte, either from an int in range(256), or
+ # from a bytes argument of length 1, not from a str.
+ testcommon(b"%c", 7, b"\x07")
+ testcommon(b"%c", b"Z", b"Z")
+ testcommon(b"%c", bytearray(b"Z"), b"Z")
+ # %b will insert a series of bytes, either from a type that supports
+ # the Py_buffer protocol, or something that has a __bytes__ method
+ class FakeBytes(object):
+ def __bytes__(self):
+ return b'123'
+ fb = FakeBytes()
+ testcommon(b"%b", b"abc", b"abc")
+ testcommon(b"%b", bytearray(b"def"), b"def")
+ testcommon(b"%b", fb, b"123")
+ # # %s is an alias for %b -- should only be used for Py2/3 code
+ testcommon(b"%s", b"abc", b"abc")
+ testcommon(b"%s", bytearray(b"def"), b"def")
+ testcommon(b"%s", fb, b"123")
+ # %a will give the equivalent of
+ # repr(some_obj).encode('ascii', 'backslashreplace')
+ testcommon(b"%a", 3.14, b"3.14")
+ testcommon(b"%a", b"ghi", b"b'ghi'")
+ testcommon(b"%a", "jkl", b"'jkl'")
+ testcommon(b"%a", "\u0544", b"'\\u0544'")
+
+ # Test exception for unknown format characters, etc.
+ if verbose:
+ print('Testing exceptions')
+ def test_exc(formatstr, args, exception, excmsg):
+ try:
+ testformat(formatstr, args)
+ except exception as exc:
+ if str(exc) == excmsg:
+ if verbose:
+ print("yes")
+ else:
+ if verbose: print('no')
+ print('Unexpected ', exception, ':', repr(str(exc)))
+ except:
+ if verbose: print('no')
+ print('Unexpected exception')
+ raise
+ else:
+ raise TestFailed('did not get expected exception: %s' % excmsg)
+ test_exc(b'%d', '1', TypeError,
+ "%d format: a number is required, not str")
+ test_exc(b'%d', b'1', TypeError,
+ "%d format: a number is required, not bytes")
+ test_exc(b'%g', '1', TypeError, "float argument required, not str")
+ test_exc(b'%g', b'1', TypeError, "float argument required, not bytes")
+ test_exc(b'no format', 7, TypeError,
+ "not all arguments converted during bytes formatting")
+ test_exc(b'no format', b'1', TypeError,
+ "not all arguments converted during bytes formatting")
+ test_exc(b'no format', bytearray(b'1'), TypeError,
+ "not all arguments converted during bytes formatting")
+ test_exc(b"%c", 256, TypeError,
+ "%c requires an integer in range(256) or a single byte")
+ test_exc(b"%c", b"Za", TypeError,
+ "%c requires an integer in range(256) or a single byte")
+ test_exc(b"%c", "Yb", TypeError,
+ "%c requires an integer in range(256) or a single byte")
+ test_exc(b"%b", "Xc", TypeError,
+ "%b requires bytes, or an object that implements __bytes__, not 'str'")
+ test_exc(b"%s", "Wd", TypeError,
+ "%b requires bytes, or an object that implements __bytes__, not 'str'")
if maxsize == 2**31-1:
# crashes 2.2.1 and earlier:
diff --git a/Misc/NEWS b/Misc/NEWS
index 91643f3..127a747 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -14,6 +14,9 @@ Core and Builtins
atomic memory access if available. Patch written by Vitor de Lima and Gustavo
Temple.
+- Issue #20284: %-interpolation (aka printf) formatting added for bytes and
+ bytearray.
+
- Issue #23048: Fix jumping out of an infinite while loop in the pdb.
- Issue #20335: bytes constructor now raises TypeError when encoding or errors
diff --git a/Objects/abstract.c b/Objects/abstract.c
index 323c985..9ab60f2 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -686,8 +686,9 @@ PyObject_Format(PyObject *obj, PyObject *format_spec)
Py_DECREF(meth);
if (result && !PyUnicode_Check(result)) {
- PyErr_SetString(PyExc_TypeError,
- "__format__ method did not return string");
+ PyErr_Format(PyExc_TypeError,
+ "__format__ must return a str, not %.200s",
+ Py_TYPE(result)->tp_name);
Py_DECREF(result);
result = NULL;
goto done;
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 47d480f..b9a87d6 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -4,6 +4,7 @@
#include "Python.h"
#include "structmember.h"
#include "bytes_methods.h"
+#include "bytesobject.h"
/*[clinic input]
class bytearray "PyByteArrayObject *" "&PyByteArray_Type"
@@ -294,6 +295,31 @@ PyByteArray_Concat(PyObject *a, PyObject *b)
return (PyObject *)result;
}
+static PyObject *
+bytearray_format(PyByteArrayObject *self, PyObject *args)
+{
+ PyObject *bytes_in, *bytes_out, *res;
+ char *bytestring;
+
+ if (self == NULL || !PyByteArray_Check(self) || args == NULL) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+ bytestring = PyByteArray_AS_STRING(self);
+ bytes_in = PyBytes_FromString(bytestring);
+ if (bytes_in == NULL)
+ return NULL;
+ bytes_out = _PyBytes_Format(bytes_in, args);
+ Py_DECREF(bytes_in);
+ if (bytes_out == NULL)
+ return NULL;
+ res = PyByteArray_FromObject(bytes_out);
+ Py_DECREF(bytes_out);
+ if (res == NULL)
+ return NULL;
+ return res;
+}
+
/* Functions stuffed into the type object */
static Py_ssize_t
@@ -3723,6 +3749,21 @@ bytearray_methods[] = {
{NULL}
};
+static PyObject *
+bytearray_mod(PyObject *v, PyObject *w)
+{
+ if (!PyByteArray_Check(v))
+ Py_RETURN_NOTIMPLEMENTED;
+ return bytearray_format((PyByteArrayObject *)v, w);
+}
+
+static PyNumberMethods bytearray_as_number = {
+ 0, /*nb_add*/
+ 0, /*nb_subtract*/
+ 0, /*nb_multiply*/
+ bytearray_mod, /*nb_remainder*/
+};
+
PyDoc_STRVAR(bytearray_doc,
"bytearray(iterable_of_ints) -> bytearray\n\
bytearray(string, encoding[, errors]) -> bytearray\n\
@@ -3751,7 +3792,7 @@ PyTypeObject PyByteArray_Type = {
0, /* tp_setattr */
0, /* tp_reserved */
(reprfunc)bytearray_repr, /* tp_repr */
- 0, /* tp_as_number */
+ &bytearray_as_number, /* tp_as_number */
&bytearray_as_sequence, /* tp_as_sequence */
&bytearray_as_mapping, /* tp_as_mapping */
0, /* tp_hash */
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index a5b9feb..bf919b5 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -400,6 +400,634 @@ PyBytes_FromFormat(const char *format, ...)
return ret;
}
+/* Helpers for formatstring */
+
+Py_LOCAL_INLINE(PyObject *)
+getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
+{
+ Py_ssize_t argidx = *p_argidx;
+ if (argidx < arglen) {
+ (*p_argidx)++;
+ if (arglen < 0)
+ return args;
+ else
+ return PyTuple_GetItem(args, argidx);
+ }
+ PyErr_SetString(PyExc_TypeError,
+ "not enough arguments for format string");
+ return NULL;
+}
+
+/* Format codes
+ * F_LJUST '-'
+ * F_SIGN '+'
+ * F_BLANK ' '
+ * F_ALT '#'
+ * F_ZERO '0'
+ */
+#define F_LJUST (1<<0)
+#define F_SIGN (1<<1)
+#define F_BLANK (1<<2)
+#define F_ALT (1<<3)
+#define F_ZERO (1<<4)
+
+/* Returns a new reference to a PyBytes object, or NULL on failure. */
+
+static PyObject *
+formatfloat(PyObject *v, int flags, int prec, int type)
+{
+ char *p;
+ PyObject *result;
+ double x;
+
+ x = PyFloat_AsDouble(v);
+ if (x == -1.0 && PyErr_Occurred()) {
+ PyErr_Format(PyExc_TypeError, "float argument required, "
+ "not %.200s", Py_TYPE(v)->tp_name);
+ return NULL;
+ }
+
+ if (prec < 0)
+ prec = 6;
+
+ p = PyOS_double_to_string(x, type, prec,
+ (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
+
+ if (p == NULL)
+ return NULL;
+ result = PyBytes_FromStringAndSize(p, strlen(p));
+ PyMem_Free(p);
+ return result;
+}
+
+/* format_long emulates the format codes d, u, o, x and X, and
+ * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
+ * Python's regular ints.
+ * Return value: a new PyBytes*, or NULL if error.
+ * . *pbuf is set to point into it,
+ * *plen set to the # of chars following that.
+ * Caller must decref it when done using pbuf.
+ * The string starting at *pbuf is of the form
+ * "-"? ("0x" | "0X")? digit+
+ * "0x"/"0X" are present only for x and X conversions, with F_ALT
+ * set in flags. The case of hex digits will be correct,
+ * There will be at least prec digits, zero-filled on the left if
+ * necessary to get that many.
+ * val object to be converted
+ * flags bitmask of format flags; only F_ALT is looked at
+ * prec minimum number of digits; 0-fill on left if needed
+ * type a character in [duoxX]; u acts the same as d
+ *
+ * CAUTION: o, x and X conversions on regular ints can never
+ * produce a '-' sign, but can for Python's unbounded ints.
+ */
+
+static PyObject *
+format_long(PyObject *val, int flags, int prec, int type,
+ char **pbuf, int *plen)
+{
+ PyObject *s;
+ PyObject *result = NULL;
+
+ s = _PyUnicode_FormatLong(val, flags & F_ALT, prec, type);
+ if (!s)
+ return NULL;
+ result = _PyUnicode_AsASCIIString(s, "strict");
+ Py_DECREF(s);
+ if (!result)
+ return NULL;
+ *pbuf = PyBytes_AS_STRING(result);
+ *plen = PyBytes_GET_SIZE(result);
+ return result;
+}
+
+Py_LOCAL_INLINE(int)
+formatchar(char *buf, size_t buflen, PyObject *v)
+{
+ PyObject *w = NULL;
+ /* convert bytearray to bytes */
+ if (PyByteArray_Check(v)) {
+ w = PyBytes_FromObject(v);
+ if (w == NULL)
+ goto error;
+ v = w;
+ }
+ /* presume that the buffer is at least 2 characters long */
+ if (PyBytes_Check(v)) {
+ if (!PyArg_Parse(v, "c;%c requires an integer in range(256) or a single byte", &buf[0]))
+ goto error;
+ }
+ else {
+ long ival = PyLong_AsLong(v);
+ if (ival == -1 && PyErr_Occurred()) {
+ PyErr_SetString(PyExc_TypeError,
+ "%c requires an integer in range(256) or a single byte");
+ goto error;
+ }
+ if (ival < 0 || ival > 255) {
+ PyErr_SetString(PyExc_TypeError,
+ "%c requires an integer in range(256) or a single byte");
+ goto error;
+ }
+ buf[0] = ival;
+ }
+ Py_XDECREF(w);
+ buf[1] = '\0';
+ return 1;
+
+ error:
+ Py_XDECREF(w);
+ return -1;
+}
+
+static PyObject *
+format_obj(PyObject *v)
+{
+ PyObject *result = NULL, *w = NULL;
+ PyObject *func;
+ _Py_IDENTIFIER(__bytes__);
+ /* convert bytearray to bytes */
+ if (PyByteArray_Check(v)) {
+ w = PyBytes_FromObject(v);
+ if (w == NULL)
+ return NULL;
+ v = w;
+ }
+ /* is it a bytes object? */
+ if (PyBytes_Check(v)) {
+ result = v;
+ Py_INCREF(v);
+ Py_XDECREF(w);
+ return result;
+ }
+ /* does it support __bytes__? */
+ func = _PyObject_LookupSpecial(v, &PyId___bytes__);
+ if (func != NULL) {
+ result = PyObject_CallFunctionObjArgs(func, NULL);
+ Py_DECREF(func);
+ if (result == NULL)
+ return NULL;
+ if (!PyBytes_Check(result)) {
+ PyErr_Format(PyExc_TypeError,
+ "__bytes__ returned non-bytes (type %.200s)",
+ Py_TYPE(result)->tp_name);
+ Py_DECREF(result);
+ return NULL;
+ }
+ return result;
+ }
+ PyErr_Format(PyExc_TypeError,
+ "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
+ Py_TYPE(v)->tp_name);
+ return NULL;
+}
+
+/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
+
+ FORMATBUFLEN is the length of the buffer in which the ints &
+ chars are formatted. XXX This is a magic number. Each formatting
+ routine does bounds checking to ensure no overflow, but a better
+ solution may be to malloc a buffer of appropriate size for each
+ format. For now, the current solution is sufficient.
+*/
+#define FORMATBUFLEN (size_t)120
+
+PyObject *
+_PyBytes_Format(PyObject *format, PyObject *args)
+{
+ char *fmt, *res;
+ Py_ssize_t arglen, argidx;
+ Py_ssize_t reslen, rescnt, fmtcnt;
+ int args_owned = 0;
+ PyObject *result;
+ PyObject *repr;
+ PyObject *dict = NULL;
+ if (format == NULL || !PyBytes_Check(format) || args == NULL) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+ fmt = PyBytes_AS_STRING(format);
+ fmtcnt = PyBytes_GET_SIZE(format);
+ reslen = rescnt = fmtcnt + 100;
+ result = PyBytes_FromStringAndSize((char *)NULL, reslen);
+ if (result == NULL)
+ return NULL;
+ res = PyBytes_AsString(result);
+ if (PyTuple_Check(args)) {
+ arglen = PyTuple_GET_SIZE(args);
+ argidx = 0;
+ }
+ else {
+ arglen = -1;
+ argidx = -2;
+ }
+ if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
+ !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
+ !PyByteArray_Check(args)) {
+ dict = args;
+ }
+ while (--fmtcnt >= 0) {
+ if (*fmt != '%') {
+ if (--rescnt < 0) {
+ rescnt = fmtcnt + 100;
+ reslen += rescnt;
+ if (_PyBytes_Resize(&result, reslen))
+ return NULL;
+ res = PyBytes_AS_STRING(result)
+ + reslen - rescnt;
+ --rescnt;
+ }
+ *res++ = *fmt++;
+ }
+ else {
+ /* Got a format specifier */
+ int flags = 0;
+ Py_ssize_t width = -1;
+ int prec = -1;
+ int c = '\0';
+ int fill;
+ int isnumok;
+ PyObject *v = NULL;
+ PyObject *temp = NULL;
+ Py_buffer buf;
+ char *pbuf;
+ int sign;
+ Py_ssize_t len;
+ char formatbuf[FORMATBUFLEN];
+ /* For format{int,char}() */
+
+ buf.obj = NULL;
+ fmt++;
+ if (*fmt == '(') {
+ char *keystart;
+ Py_ssize_t keylen;
+ PyObject *key;
+ int pcount = 1;
+
+ if (dict == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "format requires a mapping");
+ goto error;
+ }
+ ++fmt;
+ --fmtcnt;
+ keystart = fmt;
+ /* Skip over balanced parentheses */
+ while (pcount > 0 && --fmtcnt >= 0) {
+ if (*fmt == ')')
+ --pcount;
+ else if (*fmt == '(')
+ ++pcount;
+ fmt++;
+ }
+ keylen = fmt - keystart - 1;
+ if (fmtcnt < 0 || pcount > 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format key");
+ goto error;
+ }
+ key = PyBytes_FromStringAndSize(keystart,
+ keylen);
+ if (key == NULL)
+ goto error;
+ if (args_owned) {
+ Py_DECREF(args);
+ args_owned = 0;
+ }
+ args = PyObject_GetItem(dict, key);
+ Py_DECREF(key);
+ if (args == NULL) {
+ goto error;
+ }
+ args_owned = 1;
+ arglen = -1;
+ argidx = -2;
+ }
+ while (--fmtcnt >= 0) {
+ switch (c = *fmt++) {
+ case '-': flags |= F_LJUST; continue;
+ case '+': flags |= F_SIGN; continue;
+ case ' ': flags |= F_BLANK; continue;
+ case '#': flags |= F_ALT; continue;
+ case '0': flags |= F_ZERO; continue;
+ }
+ break;
+ }
+ if (c == '*') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto error;
+ if (!PyLong_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "* wants int");
+ goto error;
+ }
+ width = PyLong_AsSsize_t(v);
+ if (width == -1 && PyErr_Occurred())
+ goto error;
+ if (width < 0) {
+ flags |= F_LJUST;
+ width = -width;
+ }
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ else if (c >= 0 && isdigit(c)) {
+ width = c - '0';
+ while (--fmtcnt >= 0) {
+ c = Py_CHARMASK(*fmt++);
+ if (!isdigit(c))
+ break;
+ if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
+ PyErr_SetString(
+ PyExc_ValueError,
+ "width too big");
+ goto error;
+ }
+ width = width*10 + (c - '0');
+ }
+ }
+ if (c == '.') {
+ prec = 0;
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ if (c == '*') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto error;
+ if (!PyLong_Check(v)) {
+ PyErr_SetString(
+ PyExc_TypeError,
+ "* wants int");
+ goto error;
+ }
+ prec = PyLong_AsSsize_t(v);
+ if (prec == -1 && PyErr_Occurred())
+ goto error;
+ if (prec < 0)
+ prec = 0;
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ else if (c >= 0 && isdigit(c)) {
+ prec = c - '0';
+ while (--fmtcnt >= 0) {
+ c = Py_CHARMASK(*fmt++);
+ if (!isdigit(c))
+ break;
+ if (prec > (INT_MAX - ((int)c - '0')) / 10) {
+ PyErr_SetString(
+ PyExc_ValueError,
+ "prec too big");
+ goto error;
+ }
+ prec = prec*10 + (c - '0');
+ }
+ }
+ } /* prec */
+ if (fmtcnt >= 0) {
+ if (c == 'h' || c == 'l' || c == 'L') {
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ }
+ if (fmtcnt < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format");
+ goto error;
+ }
+ if (c != '%') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto error;
+ }
+ sign = 0;
+ fill = ' ';
+ switch (c) {
+ case '%':
+ pbuf = "%";
+ len = 1;
+ break;
+ case 'a':
+ temp = PyObject_Repr(v);
+ if (temp == NULL)
+ goto error;
+ repr = PyUnicode_AsEncodedObject(temp, "ascii", "backslashreplace");
+ if (repr == NULL) {
+ Py_DECREF(temp);
+ goto error;
+ }
+ if (_getbuffer(repr, &buf) < 0) {
+ temp = format_obj(repr);
+ if (temp == NULL) {
+ Py_DECREF(repr);
+ goto error;
+ }
+ Py_DECREF(repr);
+ repr = temp;
+ }
+ pbuf = PyBytes_AS_STRING(repr);
+ len = PyBytes_GET_SIZE(repr);
+ Py_DECREF(repr);
+ if (prec >= 0 && len > prec)
+ len = prec;
+ break;
+ case 's':
+ // %s is only for 2/3 code; 3 only code should use %b
+ case 'b':
+ temp = format_obj(v);
+ if (temp == NULL)
+ goto error;
+ pbuf = PyBytes_AS_STRING(temp);
+ len = PyBytes_GET_SIZE(temp);
+ if (prec >= 0 && len > prec)
+ len = prec;
+ break;
+ case 'i':
+ case 'd':
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ if (c == 'i')
+ c = 'd';
+ isnumok = 0;
+ if (PyNumber_Check(v)) {
+ PyObject *iobj=NULL;
+
+ if ((PyLong_Check(v))) {
+ iobj = v;
+ Py_INCREF(iobj);
+ }
+ else {
+ iobj = PyNumber_Long(v);
+ }
+ if (iobj!=NULL) {
+ if (PyLong_Check(iobj)) {
+ int ilen;
+
+ isnumok = 1;
+ temp = format_long(iobj, flags, prec, c,
+ &pbuf, &ilen);
+ Py_DECREF(iobj);
+ len = ilen;
+ if (!temp)
+ goto error;
+ sign = 1;
+ }
+ else {
+ Py_DECREF(iobj);
+ }
+ }
+ }
+ if (!isnumok) {
+ PyErr_Format(PyExc_TypeError,
+ "%%%c format: a number is required, "
+ "not %.200s", c, Py_TYPE(v)->tp_name);
+ goto error;
+ }
+ if (flags & F_ZERO)
+ fill = '0';
+ break;
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ temp = formatfloat(v, flags, prec, c);
+ if (temp == NULL)
+ goto error;
+ pbuf = PyBytes_AS_STRING(temp);
+ len = PyBytes_GET_SIZE(temp);
+ sign = 1;
+ if (flags & F_ZERO)
+ fill = '0';
+ break;
+ case 'c':
+ pbuf = formatbuf;
+ len = formatchar(pbuf, sizeof(formatbuf), v);
+ if (len < 0)
+ goto error;
+ break;
+ default:
+ PyErr_Format(PyExc_ValueError,
+ "unsupported format character '%c' (0x%x) "
+ "at index %zd",
+ c, c,
+ (Py_ssize_t)(fmt - 1 -
+ PyBytes_AsString(format)));
+ goto error;
+ }
+ if (sign) {
+ if (*pbuf == '-' || *pbuf == '+') {
+ sign = *pbuf++;
+ len--;
+ }
+ else if (flags & F_SIGN)
+ sign = '+';
+ else if (flags & F_BLANK)
+ sign = ' ';
+ else
+ sign = 0;
+ }
+ if (width < len)
+ width = len;
+ if (rescnt - (sign != 0) < width) {
+ reslen -= rescnt;
+ rescnt = width + fmtcnt + 100;
+ reslen += rescnt;
+ if (reslen < 0) {
+ Py_DECREF(result);
+ PyBuffer_Release(&buf);
+ Py_XDECREF(temp);
+ return PyErr_NoMemory();
+ }
+ if (_PyBytes_Resize(&result, reslen)) {
+ PyBuffer_Release(&buf);
+ Py_XDECREF(temp);
+ return NULL;
+ }
+ res = PyBytes_AS_STRING(result)
+ + reslen - rescnt;
+ }
+ if (sign) {
+ if (fill != ' ')
+ *res++ = sign;
+ rescnt--;
+ if (width > len)
+ width--;
+ }
+ if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ if (fill != ' ') {
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ rescnt -= 2;
+ width -= 2;
+ if (width < 0)
+ width = 0;
+ len -= 2;
+ }
+ if (width > len && !(flags & F_LJUST)) {
+ do {
+ --rescnt;
+ *res++ = fill;
+ } while (--width > len);
+ }
+ if (fill == ' ') {
+ if (sign)
+ *res++ = sign;
+ if ((flags & F_ALT) &&
+ (c == 'x' || c == 'X')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ }
+ Py_MEMCPY(res, pbuf, len);
+ res += len;
+ rescnt -= len;
+ while (--width >= len) {
+ --rescnt;
+ *res++ = ' ';
+ }
+ if (dict && (argidx < arglen) && c != '%') {
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during bytes formatting");
+ PyBuffer_Release(&buf);
+ Py_XDECREF(temp);
+ goto error;
+ }
+ PyBuffer_Release(&buf);
+ Py_XDECREF(temp);
+ } /* '%' */
+ } /* until end */
+ if (argidx < arglen && !dict) {
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during bytes formatting");
+ goto error;
+ }
+ if (args_owned) {
+ Py_DECREF(args);
+ }
+ if (_PyBytes_Resize(&result, reslen - rescnt))
+ return NULL;
+ return result;
+
+ error:
+ Py_DECREF(result);
+ if (args_owned) {
+ Py_DECREF(args);
+ }
+ return NULL;
+}
+
+/* =-= */
+
static void
bytes_dealloc(PyObject *op)
{
@@ -2996,6 +3624,21 @@ bytes_methods[] = {
};
static PyObject *
+bytes_mod(PyObject *v, PyObject *w)
+{
+ if (!PyBytes_Check(v))
+ Py_RETURN_NOTIMPLEMENTED;
+ return _PyBytes_Format(v, w);
+}
+
+static PyNumberMethods bytes_as_number = {
+ 0, /*nb_add*/
+ 0, /*nb_subtract*/
+ 0, /*nb_multiply*/
+ bytes_mod, /*nb_remainder*/
+};
+
+static PyObject *
str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
static PyObject *
@@ -3286,7 +3929,7 @@ PyTypeObject PyBytes_Type = {
0, /* tp_setattr */
0, /* tp_reserved */
(reprfunc)bytes_repr, /* tp_repr */
- 0, /* tp_as_number */
+ &bytes_as_number, /* tp_as_number */
&bytes_as_sequence, /* tp_as_sequence */
&bytes_as_mapping, /* tp_as_mapping */
(hashfunc)bytes_hash, /* tp_hash */
@@ -3377,14 +4020,14 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
}
-/* The following function breaks the notion that strings are immutable:
- it changes the size of a string. We get away with this only if there
+/* The following function breaks the notion that bytes are immutable:
+ it changes the size of a bytes object. We get away with this only if there
is only one module referencing the object. You can also think of it
- as creating a new string object and destroying the old one, only
- more efficiently. In any case, don't use this if the string may
+ as creating a new bytes object and destroying the old one, only
+ more efficiently. In any case, don't use this if the bytes object may
already be known to some other part of the code...
- Note that if there's not enough memory to resize the string, the original
- string object at *pv is deallocated, *pv is set to NULL, an "out of
+ Note that if there's not enough memory to resize the bytes object, the
+ original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
memory" exception is set, and -1 is returned. Else (on success) 0 is
returned, and the value in *pv may or may not be the same as on input.
As always, an extra byte is allocated for a trailing \0 byte (newsize
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 3d51f34..ee0c044 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -13893,8 +13893,8 @@ formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
* CAUTION: o, x and X conversions on regular ints can never
* produce a '-' sign, but can for Python's unbounded ints.
*/
-static PyObject*
-formatlong(PyObject *val, struct unicode_format_arg_t *arg)
+PyObject *
+_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
{
PyObject *result = NULL;
char *buf;
@@ -13904,8 +13904,6 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg)
Py_ssize_t llen;
int numdigits; /* len == numnondigits + numdigits */
int numnondigits = 0;
- int prec = arg->prec;
- int type = arg->ch;
/* Avoid exceeding SSIZE_T_MAX */
if (prec > INT_MAX-3) {
@@ -13954,7 +13952,7 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg)
if (llen > INT_MAX) {
Py_DECREF(result);
PyErr_SetString(PyExc_ValueError,
- "string too large in _PyBytes_FormatLong");
+ "string too large in _PyUnicode_FormatLong");
return NULL;
}
len = (int)llen;
@@ -13964,7 +13962,7 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg)
assert(numdigits > 0);
/* Get rid of base marker unless F_ALT */
- if (((arg->flags & F_ALT) == 0 &&
+ if (((alt) == 0 &&
(type == 'o' || type == 'x' || type == 'X'))) {
assert(buf[sign] == '0');
assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
@@ -14099,7 +14097,7 @@ mainformatlong(PyObject *v,
return 1;
}
- res = formatlong(iobj, arg);
+ res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
Py_DECREF(iobj);
if (res == NULL)
return -1;