diff options
author | Eric Smith <eric@trueblade.com> | 2008-02-17 19:46:49 (GMT) |
---|---|---|
committer | Eric Smith <eric@trueblade.com> | 2008-02-17 19:46:49 (GMT) |
commit | a9f7d6248032c9572b4d2024a1be8bd2823af09f (patch) | |
tree | 5465a1051312055678248db0076d314924ee4ebc | |
parent | e139688d34cc12b23d3a310f10d4f440f75f7d08 (diff) | |
download | cpython-a9f7d6248032c9572b4d2024a1be8bd2823af09f.zip cpython-a9f7d6248032c9572b4d2024a1be8bd2823af09f.tar.gz cpython-a9f7d6248032c9572b4d2024a1be8bd2823af09f.tar.bz2 |
Backport of PEP 3101, Advanced String Formatting, from py3k.
Highlights:
- Adding PyObject_Format.
- Adding string.Format class.
- Adding __format__ for str, unicode, int, long, float, datetime.
- Adding builtin format.
- Adding ''.format and u''.format.
- str/unicode fixups for formatters.
The files in Objects/stringlib that implement PEP 3101 (stringdefs.h,
unicodedefs.h, formatter.h, string_format.h) are identical in trunk
and py3k. Any changes from here on should be made to trunk, and
changes will propogate to py3k).
-rw-r--r-- | Include/abstract.h | 7 | ||||
-rw-r--r-- | Include/formatter_string.h | 12 | ||||
-rw-r--r-- | Include/formatter_unicode.h | 12 | ||||
-rw-r--r-- | Lib/string.py | 112 | ||||
-rw-r--r-- | Lib/test/test_builtin.py | 95 | ||||
-rw-r--r-- | Lib/test/test_datetime.py | 76 | ||||
-rw-r--r-- | Lib/test/test_descrtut.py | 1 | ||||
-rw-r--r-- | Lib/test/test_str.py | 258 | ||||
-rw-r--r-- | Lib/test/test_string.py | 86 | ||||
-rw-r--r-- | Lib/test/test_types.py | 251 | ||||
-rw-r--r-- | Lib/test/test_unicode.py | 262 | ||||
-rw-r--r-- | Makefile.pre.in | 16 | ||||
-rw-r--r-- | Modules/datetimemodule.c | 32 | ||||
-rw-r--r-- | Objects/abstract.c | 132 | ||||
-rw-r--r-- | Objects/floatobject.c | 43 | ||||
-rw-r--r-- | Objects/intobject.c | 36 | ||||
-rw-r--r-- | Objects/longobject.c | 36 | ||||
-rw-r--r-- | Objects/stringlib/formatter.h | 980 | ||||
-rw-r--r-- | Objects/stringlib/string_format.h | 1214 | ||||
-rw-r--r-- | Objects/stringlib/stringdefs.h | 27 | ||||
-rw-r--r-- | Objects/stringlib/unicodedefs.h | 52 | ||||
-rw-r--r-- | Objects/stringobject.c | 29 | ||||
-rw-r--r-- | Objects/typeobject.c | 46 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 34 | ||||
-rw-r--r-- | Python/bltinmodule.c | 19 | ||||
-rw-r--r-- | Python/formatter_string.c | 15 | ||||
-rw-r--r-- | Python/formatter_unicode.c | 13 |
27 files changed, 3873 insertions, 23 deletions
diff --git a/Include/abstract.h b/Include/abstract.h index a432a65..c61cc8b 100644 --- a/Include/abstract.h +++ b/Include/abstract.h @@ -529,6 +529,13 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/ */ + PyAPI_FUNC(PyObject *) PyObject_Format(PyObject* obj, + PyObject *format_spec); + /* + Takes an arbitrary object and returns the result of + calling obj.__format__(format_spec). + */ + /* Iterators */ PyAPI_FUNC(PyObject *) PyObject_GetIter(PyObject *); diff --git a/Include/formatter_string.h b/Include/formatter_string.h new file mode 100644 index 0000000..14c4811 --- /dev/null +++ b/Include/formatter_string.h @@ -0,0 +1,12 @@ +PyObject * +string__format__(PyObject *self, PyObject *args); + +PyObject * +string_long__format__(PyObject *self, PyObject *args); + +PyObject * +string_int__format__(PyObject *self, PyObject *args); + +PyObject * +string_float__format__(PyObject *self, PyObject *args); + diff --git a/Include/formatter_unicode.h b/Include/formatter_unicode.h new file mode 100644 index 0000000..51406ab --- /dev/null +++ b/Include/formatter_unicode.h @@ -0,0 +1,12 @@ +PyObject * +unicode__format__(PyObject *self, PyObject *args); + +PyObject * +unicode_long__format__(PyObject *self, PyObject *args); + +PyObject * +unicode_int__format__(PyObject *self, PyObject *args); + +PyObject * +unicode_float__format__(PyObject *self, PyObject *args); + diff --git a/Lib/string.py b/Lib/string.py index 9e3cc41..eb680c2 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -527,3 +527,115 @@ try: letters = lowercase + uppercase except ImportError: pass # Use the original versions + +######################################################################## +# the Formatter class +# see PEP 3101 for details and purpose of this class + +# The hard parts are reused from the C implementation. They're +# exposed here via the sys module. sys was chosen because it's always +# available and doesn't have to be dynamically loaded. + +# The overall parser is implemented in str._formatter_parser. +# The field name parser is implemented in str._formatter_field_name_split + +class Formatter(object): + def format(self, format_string, *args, **kwargs): + return self.vformat(format_string, args, kwargs) + + def vformat(self, format_string, args, kwargs): + used_args = set() + result = self._vformat(format_string, args, kwargs, used_args, 2) + self.check_unused_args(used_args, args, kwargs) + return result + + def _vformat(self, format_string, args, kwargs, used_args, recursion_depth): + if recursion_depth < 0: + raise ValueError('Max string recursion exceeded') + result = [] + for literal_text, field_name, format_spec, conversion in \ + self.parse(format_string): + + # output the literal text + if literal_text: + result.append(literal_text) + + # if there's a field, output it + if field_name is not None: + # this is some markup, find the object and do + # the formatting + + # given the field_name, find the object it references + # and the argument it came from + obj, arg_used = self.get_field(field_name, args, kwargs) + used_args.add(arg_used) + + # do any conversion on the resulting object + obj = self.convert_field(obj, conversion) + + # expand the format spec, if needed + format_spec = self._vformat(format_spec, args, kwargs, + used_args, recursion_depth-1) + + # format the object and append to the result + result.append(self.format_field(obj, format_spec)) + + return ''.join(result) + + + def get_value(self, key, args, kwargs): + if isinstance(key, (int, long)): + return args[key] + else: + return kwargs[key] + + + def check_unused_args(self, used_args, args, kwargs): + pass + + + def format_field(self, value, format_spec): + return format(value, format_spec) + + + def convert_field(self, value, conversion): + # do any conversion on the resulting object + if conversion == 'r': + return repr(value) + elif conversion == 's': + return str(value) + elif conversion is None: + return value + raise ValueError("Unknown converion specifier {0!s}".format(conversion)) + + + # returns an iterable that contains tuples of the form: + # (literal_text, field_name, format_spec, conversion) + # literal_text can be zero length + # field_name can be None, in which case there's no + # object to format and output + # if field_name is not None, it is looked up, formatted + # with format_spec and conversion and then used + def parse(self, format_string): + return format_string._formatter_parser() + + + # given a field_name, find the object it references. + # field_name: the field being looked up, e.g. "0.name" + # or "lookup[3]" + # used_args: a set of which args have been used + # args, kwargs: as passed in to vformat + def get_field(self, field_name, args, kwargs): + first, rest = field_name._formatter_field_name_split() + + obj = self.get_value(first, args, kwargs) + + # loop through the rest of the field_name, doing + # getattr or getitem as needed + for is_attr, i in rest: + if is_attr: + obj = getattr(obj, i) + else: + obj = obj[i] + + return obj, first diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index ddc5842..a5d1f92 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -2012,6 +2012,101 @@ class TestSorted(unittest.TestCase): data = 'The quick Brown fox Jumped over The lazy Dog'.split() self.assertRaises(TypeError, sorted, data, None, lambda x,y: 0) + def test_format(self): + # Test the basic machinery of the format() builtin. Don't test + # the specifics of the various formatters + self.assertEqual(format(3, ''), '3') + + # Returns some classes to use for various tests. There's + # an old-style version, and a new-style version + def classes_new(): + class A(object): + def __init__(self, x): + self.x = x + def __format__(self, format_spec): + return str(self.x) + format_spec + class DerivedFromA(A): + pass + + class Simple(object): pass + class DerivedFromSimple(Simple): + def __init__(self, x): + self.x = x + def __format__(self, format_spec): + return str(self.x) + format_spec + class DerivedFromSimple2(DerivedFromSimple): pass + return A, DerivedFromA, DerivedFromSimple, DerivedFromSimple2 + + # In 3.0, classes_classic has the same meaning as classes_new + def classes_classic(): + class A: + def __init__(self, x): + self.x = x + def __format__(self, format_spec): + return str(self.x) + format_spec + class DerivedFromA(A): + pass + + class Simple: pass + class DerivedFromSimple(Simple): + def __init__(self, x): + self.x = x + def __format__(self, format_spec): + return str(self.x) + format_spec + class DerivedFromSimple2(DerivedFromSimple): pass + return A, DerivedFromA, DerivedFromSimple, DerivedFromSimple2 + + def class_test(A, DerivedFromA, DerivedFromSimple, DerivedFromSimple2): + self.assertEqual(format(A(3), 'spec'), '3spec') + self.assertEqual(format(DerivedFromA(4), 'spec'), '4spec') + self.assertEqual(format(DerivedFromSimple(5), 'abc'), '5abc') + self.assertEqual(format(DerivedFromSimple2(10), 'abcdef'), + '10abcdef') + + class_test(*classes_new()) + class_test(*classes_classic()) + + def empty_format_spec(value): + # test that: + # format(x, '') == str(x) + # format(x) == str(x) + self.assertEqual(format(value, ""), str(value)) + self.assertEqual(format(value), str(value)) + + # for builtin types, format(x, "") == str(x) + empty_format_spec(17**13) + empty_format_spec(1.0) + empty_format_spec(3.1415e104) + empty_format_spec(-3.1415e104) + empty_format_spec(3.1415e-104) + empty_format_spec(-3.1415e-104) + empty_format_spec(object) + empty_format_spec(None) + + # TypeError because self.__format__ returns the wrong type + class BadFormatResult: + def __format__(self, format_spec): + return 1.0 + self.assertRaises(TypeError, format, BadFormatResult(), "") + + # TypeError because format_spec is not unicode or str + self.assertRaises(TypeError, format, object(), 4) + self.assertRaises(TypeError, format, object(), object()) + + # tests for object.__format__ really belong elsewhere, but + # there's no good place to put them + x = object().__format__('') + self.assert_(x.startswith('<object object at')) + + # first argument to object.__format__ must be string + self.assertRaises(TypeError, object().__format__, 3) + self.assertRaises(TypeError, object().__format__, object()) + self.assertRaises(TypeError, object().__format__, None) + + # make sure we can take a subclass of str as a format spec + class DerivedFromStr(str): pass + self.assertEqual(format(0, DerivedFromStr('10')), ' 0') + def test_main(verbose=None): test_classes = (BuiltinTest, TestSorted) diff --git a/Lib/test/test_datetime.py b/Lib/test/test_datetime.py index 21b8890..cb639b4 100644 --- a/Lib/test/test_datetime.py +++ b/Lib/test/test_datetime.py @@ -854,6 +854,32 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase): # A naive object replaces %z and %Z w/ empty strings. self.assertEqual(t.strftime("'%z' '%Z'"), "'' ''") + def test_format(self): + dt = self.theclass(2007, 9, 10) + self.assertEqual(dt.__format__(''), str(dt)) + + # check that a derived class's __str__() gets called + class A(self.theclass): + def __str__(self): + return 'A' + a = A(2007, 9, 10) + self.assertEqual(a.__format__(''), 'A') + + # check that a derived class's strftime gets called + class B(self.theclass): + def strftime(self, format_spec): + return 'B' + b = B(2007, 9, 10) + self.assertEqual(b.__format__(''), str(dt)) + + for fmt in ["m:%m d:%d y:%y", + "m:%m d:%d y:%y H:%H M:%M S:%S", + "%z %Z", + ]: + self.assertEqual(dt.__format__(fmt), dt.strftime(fmt)) + self.assertEqual(a.__format__(fmt), dt.strftime(fmt)) + self.assertEqual(b.__format__(fmt), 'B') + def test_resolution_info(self): self.assert_(isinstance(self.theclass.min, self.theclass)) self.assert_(isinstance(self.theclass.max, self.theclass)) @@ -1136,6 +1162,32 @@ class TestDateTime(TestDate): # str is ISO format with the separator forced to a blank. self.assertEqual(str(t), "0002-03-02 00:00:00") + def test_format(self): + dt = self.theclass(2007, 9, 10, 4, 5, 1, 123) + self.assertEqual(dt.__format__(''), str(dt)) + + # check that a derived class's __str__() gets called + class A(self.theclass): + def __str__(self): + return 'A' + a = A(2007, 9, 10, 4, 5, 1, 123) + self.assertEqual(a.__format__(''), 'A') + + # check that a derived class's strftime gets called + class B(self.theclass): + def strftime(self, format_spec): + return 'B' + b = B(2007, 9, 10, 4, 5, 1, 123) + self.assertEqual(b.__format__(''), str(dt)) + + for fmt in ["m:%m d:%d y:%y", + "m:%m d:%d y:%y H:%H M:%M S:%S", + "%z %Z", + ]: + self.assertEqual(dt.__format__(fmt), dt.strftime(fmt)) + self.assertEqual(a.__format__(fmt), dt.strftime(fmt)) + self.assertEqual(b.__format__(fmt), 'B') + def test_more_ctime(self): # Test fields that TestDate doesn't touch. import time @@ -1767,6 +1819,30 @@ class TestTime(HarmlessMixedComparison, unittest.TestCase): # A naive object replaces %z and %Z with empty strings. self.assertEqual(t.strftime("'%z' '%Z'"), "'' ''") + def test_format(self): + t = self.theclass(1, 2, 3, 4) + self.assertEqual(t.__format__(''), str(t)) + + # check that a derived class's __str__() gets called + class A(self.theclass): + def __str__(self): + return 'A' + a = A(1, 2, 3, 4) + self.assertEqual(a.__format__(''), 'A') + + # check that a derived class's strftime gets called + class B(self.theclass): + def strftime(self, format_spec): + return 'B' + b = B(1, 2, 3, 4) + self.assertEqual(b.__format__(''), str(t)) + + for fmt in ['%H %M %S', + ]: + self.assertEqual(t.__format__(fmt), t.strftime(fmt)) + self.assertEqual(a.__format__(fmt), t.strftime(fmt)) + self.assertEqual(b.__format__(fmt), 'B') + def test_str(self): self.assertEqual(str(self.theclass(1, 2, 3, 4)), "01:02:03.000004") self.assertEqual(str(self.theclass(10, 2, 3, 4000)), "10:02:03.004000") diff --git a/Lib/test/test_descrtut.py b/Lib/test/test_descrtut.py index 9dcfca1..94e9845 100644 --- a/Lib/test/test_descrtut.py +++ b/Lib/test/test_descrtut.py @@ -183,6 +183,7 @@ Instead, you can get the same information from the list type: '__delslice__', '__doc__', '__eq__', + '__format__', '__ge__', '__getattribute__', '__getitem__', diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index 3acb6d1..7ea46bb 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -93,6 +93,264 @@ class StrTest( return self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxint) + def test__format__(self): + def test(value, format, expected): + # test both with and without the trailing 's' + self.assertEqual(value.__format__(format), expected) + self.assertEqual(value.__format__(format + 's'), expected) + + test('', '', '') + test('abc', '', 'abc') + test('abc', '.3', 'abc') + test('ab', '.3', 'ab') + test('abcdef', '.3', 'abc') + test('abcdef', '.0', '') + test('abc', '3.3', 'abc') + test('abc', '2.3', 'abc') + test('abc', '2.2', 'ab') + test('abc', '3.2', 'ab ') + test('result', 'x<0', 'result') + test('result', 'x<5', 'result') + test('result', 'x<6', 'result') + test('result', 'x<7', 'resultx') + test('result', 'x<8', 'resultxx') + test('result', ' <7', 'result ') + test('result', '<7', 'result ') + test('result', '>7', ' result') + test('result', '>8', ' result') + test('result', '^8', ' result ') + test('result', '^9', ' result ') + test('result', '^10', ' result ') + test('a', '10000', 'a' + ' ' * 9999) + test('', '10000', ' ' * 10000) + test('', '10000000', ' ' * 10000000) + + def test_format(self): + self.assertEqual(''.format(), '') + self.assertEqual('a'.format(), 'a') + self.assertEqual('ab'.format(), 'ab') + self.assertEqual('a{{'.format(), 'a{') + self.assertEqual('a}}'.format(), 'a}') + self.assertEqual('{{b'.format(), '{b') + self.assertEqual('}}b'.format(), '}b') + self.assertEqual('a{{b'.format(), 'a{b') + + # examples from the PEP: + import datetime + self.assertEqual("My name is {0}".format('Fred'), "My name is Fred") + self.assertEqual("My name is {0[name]}".format(dict(name='Fred')), + "My name is Fred") + self.assertEqual("My name is {0} :-{{}}".format('Fred'), + "My name is Fred :-{}") + + d = datetime.date(2007, 8, 18) + self.assertEqual("The year is {0.year}".format(d), + "The year is 2007") + + # classes we'll use for testing + class C: + def __init__(self, x=100): + self._x = x + def __format__(self, spec): + return spec + + class D: + def __init__(self, x): + self.x = x + def __format__(self, spec): + return str(self.x) + + # class with __str__, but no __format__ + class E: + def __init__(self, x): + self.x = x + def __str__(self): + return 'E(' + self.x + ')' + + # class with __repr__, but no __format__ or __str__ + class F: + def __init__(self, x): + self.x = x + def __repr__(self): + return 'F(' + self.x + ')' + + # class with __format__ that forwards to string, for some format_spec's + class G: + def __init__(self, x): + self.x = x + def __str__(self): + return "string is " + self.x + def __format__(self, format_spec): + if format_spec == 'd': + return 'G(' + self.x + ')' + return object.__format__(self, format_spec) + + # class that returns a bad type from __format__ + class H: + def __format__(self, format_spec): + return 1.0 + + class I(datetime.date): + def __format__(self, format_spec): + return self.strftime(format_spec) + + class J(int): + def __format__(self, format_spec): + return int.__format__(self * 2, format_spec) + + + self.assertEqual(''.format(), '') + self.assertEqual('abc'.format(), 'abc') + self.assertEqual('{0}'.format('abc'), 'abc') + self.assertEqual('{0:}'.format('abc'), 'abc') + self.assertEqual('X{0}'.format('abc'), 'Xabc') + self.assertEqual('{0}X'.format('abc'), 'abcX') + self.assertEqual('X{0}Y'.format('abc'), 'XabcY') + self.assertEqual('{1}'.format(1, 'abc'), 'abc') + self.assertEqual('X{1}'.format(1, 'abc'), 'Xabc') + self.assertEqual('{1}X'.format(1, 'abc'), 'abcX') + self.assertEqual('X{1}Y'.format(1, 'abc'), 'XabcY') + self.assertEqual('{0}'.format(-15), '-15') + self.assertEqual('{0}{1}'.format(-15, 'abc'), '-15abc') + self.assertEqual('{0}X{1}'.format(-15, 'abc'), '-15Xabc') + self.assertEqual('{{'.format(), '{') + self.assertEqual('}}'.format(), '}') + self.assertEqual('{{}}'.format(), '{}') + self.assertEqual('{{x}}'.format(), '{x}') + self.assertEqual('{{{0}}}'.format(123), '{123}') + self.assertEqual('{{{{0}}}}'.format(), '{{0}}') + self.assertEqual('}}{{'.format(), '}{') + self.assertEqual('}}x{{'.format(), '}x{') + + # weird field names + self.assertEqual("{0[foo-bar]}".format({'foo-bar':'baz'}), 'baz') + self.assertEqual("{0[foo bar]}".format({'foo bar':'baz'}), 'baz') + self.assertEqual("{0[ ]}".format({' ':3}), '3') + + self.assertEqual('{foo._x}'.format(foo=C(20)), '20') + self.assertEqual('{1}{0}'.format(D(10), D(20)), '2010') + self.assertEqual('{0._x.x}'.format(C(D('abc'))), 'abc') + self.assertEqual('{0[0]}'.format(['abc', 'def']), 'abc') + self.assertEqual('{0[1]}'.format(['abc', 'def']), 'def') + self.assertEqual('{0[1][0]}'.format(['abc', ['def']]), 'def') + self.assertEqual('{0[1][0].x}'.format(['abc', [D('def')]]), 'def') + + # strings + self.assertEqual('{0:.3s}'.format('abc'), 'abc') + self.assertEqual('{0:.3s}'.format('ab'), 'ab') + self.assertEqual('{0:.3s}'.format('abcdef'), 'abc') + self.assertEqual('{0:.0s}'.format('abcdef'), '') + self.assertEqual('{0:3.3s}'.format('abc'), 'abc') + self.assertEqual('{0:2.3s}'.format('abc'), 'abc') + self.assertEqual('{0:2.2s}'.format('abc'), 'ab') + self.assertEqual('{0:3.2s}'.format('abc'), 'ab ') + self.assertEqual('{0:x<0s}'.format('result'), 'result') + self.assertEqual('{0:x<5s}'.format('result'), 'result') + self.assertEqual('{0:x<6s}'.format('result'), 'result') + self.assertEqual('{0:x<7s}'.format('result'), 'resultx') + self.assertEqual('{0:x<8s}'.format('result'), 'resultxx') + self.assertEqual('{0: <7s}'.format('result'), 'result ') + self.assertEqual('{0:<7s}'.format('result'), 'result ') + self.assertEqual('{0:>7s}'.format('result'), ' result') + self.assertEqual('{0:>8s}'.format('result'), ' result') + self.assertEqual('{0:^8s}'.format('result'), ' result ') + self.assertEqual('{0:^9s}'.format('result'), ' result ') + self.assertEqual('{0:^10s}'.format('result'), ' result ') + self.assertEqual('{0:10000}'.format('a'), 'a' + ' ' * 9999) + self.assertEqual('{0:10000}'.format(''), ' ' * 10000) + self.assertEqual('{0:10000000}'.format(''), ' ' * 10000000) + + # format specifiers for user defined type + self.assertEqual('{0:abc}'.format(C()), 'abc') + + # !r and !s coersions + self.assertEqual('{0!s}'.format('Hello'), 'Hello') + self.assertEqual('{0!s:}'.format('Hello'), 'Hello') + self.assertEqual('{0!s:15}'.format('Hello'), 'Hello ') + self.assertEqual('{0!s:15s}'.format('Hello'), 'Hello ') + self.assertEqual('{0!r}'.format('Hello'), "'Hello'") + self.assertEqual('{0!r:}'.format('Hello'), "'Hello'") + self.assertEqual('{0!r}'.format(F('Hello')), 'F(Hello)') + + # test fallback to object.__format__ + self.assertEqual('{0}'.format({}), '{}') + self.assertEqual('{0}'.format([]), '[]') + self.assertEqual('{0}'.format([1]), '[1]') + self.assertEqual('{0}'.format(E('data')), 'E(data)') + self.assertEqual('{0:^10}'.format(E('data')), ' E(data) ') + self.assertEqual('{0:^10s}'.format(E('data')), ' E(data) ') + self.assertEqual('{0:d}'.format(G('data')), 'G(data)') + self.assertEqual('{0:>15s}'.format(G('data')), ' string is data') + self.assertEqual('{0!s}'.format(G('data')), 'string is data') + + self.assertEqual("{0:date: %Y-%m-%d}".format(I(year=2007, + month=8, + day=27)), + "date: 2007-08-27") + + # test deriving from a builtin type and overriding __format__ + self.assertEqual("{0}".format(J(10)), "20") + + + # string format specifiers + self.assertEqual('{0:}'.format('a'), 'a') + + # computed format specifiers + self.assertEqual("{0:.{1}}".format('hello world', 5), 'hello') + self.assertEqual("{0:.{1}s}".format('hello world', 5), 'hello') + self.assertEqual("{0:.{precision}s}".format('hello world', precision=5), 'hello') + self.assertEqual("{0:{width}.{precision}s}".format('hello world', width=10, precision=5), 'hello ') + self.assertEqual("{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), 'hello ') + + # test various errors + self.assertRaises(ValueError, '{'.format) + self.assertRaises(ValueError, '}'.format) + self.assertRaises(ValueError, 'a{'.format) + self.assertRaises(ValueError, 'a}'.format) + self.assertRaises(ValueError, '{a'.format) + self.assertRaises(ValueError, '}a'.format) + self.assertRaises(IndexError, '{0}'.format) + self.assertRaises(IndexError, '{1}'.format, 'abc') + self.assertRaises(KeyError, '{x}'.format) + self.assertRaises(ValueError, "}{".format) + self.assertRaises(ValueError, "{".format) + self.assertRaises(ValueError, "}".format) + self.assertRaises(ValueError, "abc{0:{}".format) + self.assertRaises(ValueError, "{0".format) + self.assertRaises(IndexError, "{0.}".format) + self.assertRaises(ValueError, "{0.}".format, 0) + self.assertRaises(IndexError, "{0[}".format) + self.assertRaises(ValueError, "{0[}".format, []) + self.assertRaises(KeyError, "{0]}".format) + self.assertRaises(ValueError, "{0.[]}".format, 0) + self.assertRaises(ValueError, "{0..foo}".format, 0) + self.assertRaises(ValueError, "{0[0}".format, 0) + self.assertRaises(ValueError, "{0[0:foo}".format, 0) + self.assertRaises(KeyError, "{c]}".format) + self.assertRaises(ValueError, "{{ {{{0}}".format, 0) + self.assertRaises(ValueError, "{0}}".format, 0) + self.assertRaises(KeyError, "{foo}".format, bar=3) + self.assertRaises(ValueError, "{0!x}".format, 3) + self.assertRaises(ValueError, "{0!}".format, 0) + self.assertRaises(ValueError, "{0!rs}".format, 0) + self.assertRaises(ValueError, "{!}".format) + self.assertRaises(ValueError, "{:}".format) + self.assertRaises(ValueError, "{:s}".format) + self.assertRaises(ValueError, "{}".format) + + # can't have a replacement on the field name portion + self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4) + + # exceed maximum recursion depth + self.assertRaises(ValueError, "{0:{1:{2}}}".format, 'abc', 's', '') + self.assertRaises(ValueError, "{0:{1:{2:{3:{4:{5:{6}}}}}}}".format, + 0, 1, 2, 3, 4, 5, 6, 7) + + # string format spec errors + self.assertRaises(ValueError, "{0:-s}".format, '') + self.assertRaises(ValueError, format, "", "-") + self.assertRaises(ValueError, "{0:=s}".format, '') + def test_main(): test_support.run_unittest(StrTest) diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py index 9b22333..5c20a2b 100644 --- a/Lib/test/test_string.py +++ b/Lib/test/test_string.py @@ -106,6 +106,92 @@ class ModuleTest(unittest.TestCase): self.assertEqual(string.capwords('ABC-DEF-GHI', '-'), 'Abc-Def-Ghi') self.assertEqual(string.capwords('ABC-def DEF-ghi GHI'), 'Abc-def Def-ghi Ghi') + def test_formatter(self): + fmt = string.Formatter() + self.assertEqual(fmt.format("foo"), "foo") + + self.assertEqual(fmt.format("foo{0}", "bar"), "foobar") + self.assertEqual(fmt.format("foo{1}{0}-{1}", "bar", 6), "foo6bar-6") + self.assertEqual(fmt.format("-{arg!r}-", arg='test'), "-'test'-") + + # override get_value ############################################ + class NamespaceFormatter(string.Formatter): + def __init__(self, namespace={}): + string.Formatter.__init__(self) + self.namespace = namespace + + def get_value(self, key, args, kwds): + if isinstance(key, str): + try: + # Check explicitly passed arguments first + return kwds[key] + except KeyError: + return self.namespace[key] + else: + string.Formatter.get_value(key, args, kwds) + + fmt = NamespaceFormatter({'greeting':'hello'}) + self.assertEqual(fmt.format("{greeting}, world!"), 'hello, world!') + + + # override format_field ######################################### + class CallFormatter(string.Formatter): + def format_field(self, value, format_spec): + return format(value(), format_spec) + + fmt = CallFormatter() + self.assertEqual(fmt.format('*{0}*', lambda : 'result'), '*result*') + + + # override convert_field ######################################## + class XFormatter(string.Formatter): + def convert_field(self, value, conversion): + if conversion == 'x': + return None + return super(XFormatter, self).convert_field(value, conversion) + + fmt = XFormatter() + self.assertEqual(fmt.format("{0!r}:{0!x}", 'foo', 'foo'), "'foo':None") + + + # override parse ################################################ + class BarFormatter(string.Formatter): + # returns an iterable that contains tuples of the form: + # (literal_text, field_name, format_spec, conversion) + def parse(self, format_string): + for field in format_string.split('|'): + if field[0] == '+': + # it's markup + field_name, _, format_spec = field[1:].partition(':') + yield '', field_name, format_spec, None + else: + yield field, None, None, None + + fmt = BarFormatter() + self.assertEqual(fmt.format('*|+0:^10s|*', 'foo'), '* foo *') + + # test all parameters used + class CheckAllUsedFormatter(string.Formatter): + def check_unused_args(self, used_args, args, kwargs): + # Track which arguments actuallly got used + unused_args = set(kwargs.keys()) + unused_args.update(range(0, len(args))) + + for arg in used_args: + unused_args.remove(arg) + + if unused_args: + raise ValueError("unused arguments") + + fmt = CheckAllUsedFormatter() + self.assertEqual(fmt.format("{0}", 10), "10") + self.assertEqual(fmt.format("{0}{i}", 10, i=100), "10100") + self.assertEqual(fmt.format("{0}{i}{1}", 10, 20, i=100), "1010020") + self.assertRaises(ValueError, fmt.format, "{0}{i}{1}", 10, 20, i=100, j=0) + self.assertRaises(ValueError, fmt.format, "{0}", 10, 20) + self.assertRaises(ValueError, fmt.format, "{0}", 10, 20, i=100) + self.assertRaises(ValueError, fmt.format, "{i}", 10, 20, i=100) + class BytesAliasTest(unittest.TestCase): def test_builtin(self): diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index bded0d7..ae1bfe1 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -266,6 +266,257 @@ class TypesTests(unittest.TestCase): except TypeError: pass else: self.fail("char buffer (at C level) not working") + def test_int__format__(self): + def test(i, format_spec, result): + # just make sure I'm not accidentally checking longs + assert type(i) == int + assert type(format_spec) == str + self.assertEqual(i.__format__(format_spec), result) + self.assertEqual(i.__format__(unicode(format_spec)), result) + + test(123456789, 'd', '123456789') + test(123456789, 'd', '123456789') + + test(1, 'c', '\01') + + # sign and aligning are interdependent + test(1, "-", '1') + test(-1, "-", '-1') + test(1, "-3", ' 1') + test(-1, "-3", ' -1') + test(1, "+3", ' +1') + test(-1, "+3", ' -1') + test(1, " 3", ' 1') + test(-1, " 3", ' -1') + test(1, " ", ' 1') + test(-1, " ", '-1') + + # hex + test(3, "x", "3") + test(3, "X", "3") + test(1234, "x", "4d2") + test(-1234, "x", "-4d2") + test(1234, "8x", " 4d2") + test(-1234, "8x", " -4d2") + test(1234, "x", "4d2") + test(-1234, "x", "-4d2") + test(-3, "x", "-3") + test(-3, "X", "-3") + test(int('be', 16), "x", "be") + test(int('be', 16), "X", "BE") + test(-int('be', 16), "x", "-be") + test(-int('be', 16), "X", "-BE") + + # octal + test(3, "o", "3") + test(-3, "o", "-3") + test(65, "o", "101") + test(-65, "o", "-101") + test(1234, "o", "2322") + test(-1234, "o", "-2322") + test(1234, "-o", "2322") + test(-1234, "-o", "-2322") + test(1234, " o", " 2322") + test(-1234, " o", "-2322") + test(1234, "+o", "+2322") + test(-1234, "+o", "-2322") + + # binary + test(3, "b", "11") + test(-3, "b", "-11") + test(1234, "b", "10011010010") + test(-1234, "b", "-10011010010") + test(1234, "-b", "10011010010") + test(-1234, "-b", "-10011010010") + test(1234, " b", " 10011010010") + test(-1234, " b", "-10011010010") + test(1234, "+b", "+10011010010") + test(-1234, "+b", "-10011010010") + + # make sure these are errors + + # precision disallowed + self.assertRaises(ValueError, 3 .__format__, "1.3") + # sign not allowed with 'c' + self.assertRaises(ValueError, 3 .__format__, "+c") + # format spec must be string + self.assertRaises(TypeError, 3 .__format__, None) + self.assertRaises(TypeError, 3 .__format__, 0) + + # ensure that only int and float type specifiers work + for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] + + [chr(x) for x in range(ord('A'), ord('Z')+1)]): + if not format_spec in 'bcdoxXeEfFgGn%': + self.assertRaises(ValueError, 0 .__format__, format_spec) + self.assertRaises(ValueError, 1 .__format__, format_spec) + self.assertRaises(ValueError, (-1) .__format__, format_spec) + + # ensure that float type specifiers work; format converts + # the int to a float + for format_spec in 'eEfFgGn%': + for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]: + self.assertEqual(value.__format__(format_spec), + float(value).__format__(format_spec)) + + def test_long__format__(self): + def test(i, format_spec, result): + # make sure we're not accidentally checking ints + assert type(i) == long + assert type(format_spec) == str + self.assertEqual(i.__format__(format_spec), result) + self.assertEqual(i.__format__(unicode(format_spec)), result) + + test(10**100, 'd', '1' + '0' * 100) + test(10**100+100, 'd', '1' + '0' * 97 + '100') + + test(123456789L, 'd', '123456789') + test(123456789L, 'd', '123456789') + + # sign and aligning are interdependent + test(1L, "-", '1') + test(-1L, "-", '-1') + test(1L, "-3", ' 1') + test(-1L, "-3", ' -1') + test(1L, "+3", ' +1') + test(-1L, "+3", ' -1') + test(1L, " 3", ' 1') + test(-1L, " 3", ' -1') + test(1L, " ", ' 1') + test(-1L, " ", '-1') + + test(1L, 'c', '\01') + + # hex + test(3L, "x", "3") + test(3L, "X", "3") + test(1234L, "x", "4d2") + test(-1234L, "x", "-4d2") + test(1234L, "8x", " 4d2") + test(-1234L, "8x", " -4d2") + test(1234L, "x", "4d2") + test(-1234L, "x", "-4d2") + test(-3L, "x", "-3") + test(-3L, "X", "-3") + test(long('be', 16), "x", "be") + test(long('be', 16), "X", "BE") + test(-long('be', 16), "x", "-be") + test(-long('be', 16), "X", "-BE") + + # octal + test(3L, "o", "3") + test(-3L, "o", "-3") + test(65L, "o", "101") + test(-65L, "o", "-101") + test(1234L, "o", "2322") + test(-1234L, "o", "-2322") + test(1234L, "-o", "2322") + test(-1234L, "-o", "-2322") + test(1234L, " o", " 2322") + test(-1234L, " o", "-2322") + test(1234L, "+o", "+2322") + test(-1234L, "+o", "-2322") + + # binary + test(3L, "b", "11") + test(-3L, "b", "-11") + test(1234L, "b", "10011010010") + test(-1234L, "b", "-10011010010") + test(1234L, "-b", "10011010010") + test(-1234L, "-b", "-10011010010") + test(1234L, " b", " 10011010010") + test(-1234L, " b", "-10011010010") + test(1234L, "+b", "+10011010010") + test(-1234L, "+b", "-10011010010") + + # make sure these are errors + + # precision disallowed + self.assertRaises(ValueError, 3L .__format__, "1.3") + # sign not allowed with 'c' + self.assertRaises(ValueError, 3L .__format__, "+c") + # format spec must be string + self.assertRaises(TypeError, 3L .__format__, None) + self.assertRaises(TypeError, 3L .__format__, 0) + + # ensure that only int and float type specifiers work + for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] + + [chr(x) for x in range(ord('A'), ord('Z')+1)]): + if not format_spec in 'bcdoxXeEfFgGn%': + self.assertRaises(ValueError, 0L .__format__, format_spec) + self.assertRaises(ValueError, 1L .__format__, format_spec) + self.assertRaises(ValueError, (-1L) .__format__, format_spec) + + # ensure that float type specifiers work; format converts + # the long to a float + for format_spec in 'eEfFgGn%': + for value in [0L, 1L, -1L, 100L, -100L, 1234567890L, -1234567890L]: + self.assertEqual(value.__format__(format_spec), + float(value).__format__(format_spec)) + + def test_float__format__(self): + # these should be rewritten to use both format(x, spec) and + # x.__format__(spec) + + def test(f, format_spec, result): + assert type(f) == float + assert type(format_spec) == str + self.assertEqual(f.__format__(format_spec), result) + self.assertEqual(f.__format__(unicode(format_spec)), result) + + test(0.0, 'f', '0.000000') + + # the default is 'g', except for empty format spec + test(0.0, '', '0.0') + test(0.01, '', '0.01') + test(0.01, 'g', '0.01') + + test( 1.0, ' g', ' 1') + test(-1.0, ' g', '-1') + test( 1.0, '+g', '+1') + test(-1.0, '+g', '-1') + test(1.1234e200, 'g', '1.1234e+200') + test(1.1234e200, 'G', '1.1234E+200') + + + test(1.0, 'f', '1.000000') + + test(-1.0, 'f', '-1.000000') + + test( 1.0, ' f', ' 1.000000') + test(-1.0, ' f', '-1.000000') + test( 1.0, '+f', '+1.000000') + test(-1.0, '+f', '-1.000000') + test(1.1234e200, 'f', '1.1234e+200') + test(1.1234e200, 'F', '1.1234e+200') + + test( 1.0, 'e', '1.000000e+00') + test(-1.0, 'e', '-1.000000e+00') + test( 1.0, 'E', '1.000000E+00') + test(-1.0, 'E', '-1.000000E+00') + test(1.1234e20, 'e', '1.123400e+20') + test(1.1234e20, 'E', '1.123400E+20') + + # % formatting + test(-1.0, '%', '-100.000000%') + + # format spec must be string + self.assertRaises(TypeError, 3.0.__format__, None) + self.assertRaises(TypeError, 3.0.__format__, 0) + + # other format specifiers shouldn't work on floats, + # in particular int specifiers + for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] + + [chr(x) for x in range(ord('A'), ord('Z')+1)]): + if not format_spec in 'eEfFgGn%': + self.assertRaises(ValueError, format, 0.0, format_spec) + self.assertRaises(ValueError, format, 1.0, format_spec) + self.assertRaises(ValueError, format, -1.0, format_spec) + self.assertRaises(ValueError, format, 1e100, format_spec) + self.assertRaises(ValueError, format, -1e100, format_spec) + self.assertRaises(ValueError, format, 1e-100, format_spec) + self.assertRaises(ValueError, format, -1e-100, format_spec) + + def test_main(): run_unittest(TypesTests) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index ef6a722..26a57cc 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -825,6 +825,268 @@ class UnicodeTest( return self.assertRaises(OverflowError, u't\tt\t'.expandtabs, sys.maxint) + def test__format__(self): + def test(value, format, expected): + # test both with and without the trailing 's' + self.assertEqual(value.__format__(format), expected) + self.assertEqual(value.__format__(format + u's'), expected) + + test(u'', u'', u'') + test(u'abc', u'', u'abc') + test(u'abc', u'.3', u'abc') + test(u'ab', u'.3', u'ab') + test(u'abcdef', u'.3', u'abc') + test(u'abcdef', u'.0', u'') + test(u'abc', u'3.3', u'abc') + test(u'abc', u'2.3', u'abc') + test(u'abc', u'2.2', u'ab') + test(u'abc', u'3.2', u'ab ') + test(u'result', u'x<0', u'result') + test(u'result', u'x<5', u'result') + test(u'result', u'x<6', u'result') + test(u'result', u'x<7', u'resultx') + test(u'result', u'x<8', u'resultxx') + test(u'result', u' <7', u'result ') + test(u'result', u'<7', u'result ') + test(u'result', u'>7', u' result') + test(u'result', u'>8', u' result') + test(u'result', u'^8', u' result ') + test(u'result', u'^9', u' result ') + test(u'result', u'^10', u' result ') + test(u'a', u'10000', u'a' + u' ' * 9999) + test(u'', u'10000', u' ' * 10000) + test(u'', u'10000000', u' ' * 10000000) + + # test mixing unicode and str + self.assertEqual(u'abc'.__format__('s'), u'abc') + self.assertEqual(u'abc'.__format__('->10s'), u'-------abc') + + def test_format(self): + self.assertEqual(u''.format(), u'') + self.assertEqual(u'a'.format(), u'a') + self.assertEqual(u'ab'.format(), u'ab') + self.assertEqual(u'a{{'.format(), u'a{') + self.assertEqual(u'a}}'.format(), u'a}') + self.assertEqual(u'{{b'.format(), u'{b') + self.assertEqual(u'}}b'.format(), u'}b') + self.assertEqual(u'a{{b'.format(), u'a{b') + + # examples from the PEP: + import datetime + self.assertEqual(u"My name is {0}".format(u'Fred'), u"My name is Fred") + self.assertEqual(u"My name is {0[name]}".format(dict(name=u'Fred')), + u"My name is Fred") + self.assertEqual(u"My name is {0} :-{{}}".format(u'Fred'), + u"My name is Fred :-{}") + + # datetime.__format__ doesn't work with unicode + #d = datetime.date(2007, 8, 18) + #self.assertEqual("The year is {0.year}".format(d), + # "The year is 2007") + + # classes we'll use for testing + class C: + def __init__(self, x=100): + self._x = x + def __format__(self, spec): + return spec + + class D: + def __init__(self, x): + self.x = x + def __format__(self, spec): + return str(self.x) + + # class with __str__, but no __format__ + class E: + def __init__(self, x): + self.x = x + def __str__(self): + return u'E(' + self.x + u')' + + # class with __repr__, but no __format__ or __str__ + class F: + def __init__(self, x): + self.x = x + def __repr__(self): + return u'F(' + self.x + u')' + + # class with __format__ that forwards to string, for some format_spec's + class G: + def __init__(self, x): + self.x = x + def __str__(self): + return u"string is " + self.x + def __format__(self, format_spec): + if format_spec == 'd': + return u'G(' + self.x + u')' + return object.__format__(self, format_spec) + + # class that returns a bad type from __format__ + class H: + def __format__(self, format_spec): + return 1.0 + + class I(datetime.date): + def __format__(self, format_spec): + return self.strftime(format_spec) + + class J(int): + def __format__(self, format_spec): + return int.__format__(self * 2, format_spec) + + + self.assertEqual(u''.format(), u'') + self.assertEqual(u'abc'.format(), u'abc') + self.assertEqual(u'{0}'.format(u'abc'), u'abc') + self.assertEqual(u'{0:}'.format(u'abc'), u'abc') + self.assertEqual(u'X{0}'.format(u'abc'), u'Xabc') + self.assertEqual(u'{0}X'.format(u'abc'), u'abcX') + self.assertEqual(u'X{0}Y'.format(u'abc'), u'XabcY') + self.assertEqual(u'{1}'.format(1, u'abc'), u'abc') + self.assertEqual(u'X{1}'.format(1, u'abc'), u'Xabc') + self.assertEqual(u'{1}X'.format(1, u'abc'), u'abcX') + self.assertEqual(u'X{1}Y'.format(1, u'abc'), u'XabcY') + self.assertEqual(u'{0}'.format(-15), u'-15') + self.assertEqual(u'{0}{1}'.format(-15, u'abc'), u'-15abc') + self.assertEqual(u'{0}X{1}'.format(-15, u'abc'), u'-15Xabc') + self.assertEqual(u'{{'.format(), u'{') + self.assertEqual(u'}}'.format(), u'}') + self.assertEqual(u'{{}}'.format(), u'{}') + self.assertEqual(u'{{x}}'.format(), u'{x}') + self.assertEqual(u'{{{0}}}'.format(123), u'{123}') + self.assertEqual(u'{{{{0}}}}'.format(), u'{{0}}') + self.assertEqual(u'}}{{'.format(), u'}{') + self.assertEqual(u'}}x{{'.format(), u'}x{') + + # weird field names + self.assertEqual(u"{0[foo-bar]}".format({u'foo-bar':u'baz'}), u'baz') + self.assertEqual(u"{0[foo bar]}".format({u'foo bar':u'baz'}), u'baz') + self.assertEqual(u"{0[ ]}".format({u' ':3}), u'3') + + self.assertEqual(u'{foo._x}'.format(foo=C(20)), u'20') + self.assertEqual(u'{1}{0}'.format(D(10), D(20)), u'2010') + self.assertEqual(u'{0._x.x}'.format(C(D(u'abc'))), u'abc') + self.assertEqual(u'{0[0]}'.format([u'abc', u'def']), u'abc') + self.assertEqual(u'{0[1]}'.format([u'abc', u'def']), u'def') + self.assertEqual(u'{0[1][0]}'.format([u'abc', [u'def']]), u'def') + self.assertEqual(u'{0[1][0].x}'.format(['abc', [D(u'def')]]), u'def') + + # strings + self.assertEqual(u'{0:.3s}'.format(u'abc'), u'abc') + self.assertEqual(u'{0:.3s}'.format(u'ab'), u'ab') + self.assertEqual(u'{0:.3s}'.format(u'abcdef'), u'abc') + self.assertEqual(u'{0:.0s}'.format(u'abcdef'), u'') + self.assertEqual(u'{0:3.3s}'.format(u'abc'), u'abc') + self.assertEqual(u'{0:2.3s}'.format(u'abc'), u'abc') + self.assertEqual(u'{0:2.2s}'.format(u'abc'), u'ab') + self.assertEqual(u'{0:3.2s}'.format(u'abc'), u'ab ') + self.assertEqual(u'{0:x<0s}'.format(u'result'), u'result') + self.assertEqual(u'{0:x<5s}'.format(u'result'), u'result') + self.assertEqual(u'{0:x<6s}'.format(u'result'), u'result') + self.assertEqual(u'{0:x<7s}'.format(u'result'), u'resultx') + self.assertEqual(u'{0:x<8s}'.format(u'result'), u'resultxx') + self.assertEqual(u'{0: <7s}'.format(u'result'), u'result ') + self.assertEqual(u'{0:<7s}'.format(u'result'), u'result ') + self.assertEqual(u'{0:>7s}'.format(u'result'), u' result') + self.assertEqual(u'{0:>8s}'.format(u'result'), u' result') + self.assertEqual(u'{0:^8s}'.format(u'result'), u' result ') + self.assertEqual(u'{0:^9s}'.format(u'result'), u' result ') + self.assertEqual(u'{0:^10s}'.format(u'result'), u' result ') + self.assertEqual(u'{0:10000}'.format(u'a'), u'a' + u' ' * 9999) + self.assertEqual(u'{0:10000}'.format(u''), u' ' * 10000) + self.assertEqual(u'{0:10000000}'.format(u''), u' ' * 10000000) + + # format specifiers for user defined type + self.assertEqual(u'{0:abc}'.format(C()), u'abc') + + # !r and !s coersions + self.assertEqual(u'{0!s}'.format(u'Hello'), u'Hello') + self.assertEqual(u'{0!s:}'.format(u'Hello'), u'Hello') + self.assertEqual(u'{0!s:15}'.format(u'Hello'), u'Hello ') + self.assertEqual(u'{0!s:15s}'.format(u'Hello'), u'Hello ') + self.assertEqual(u'{0!r}'.format(u'Hello'), u"u'Hello'") + self.assertEqual(u'{0!r:}'.format(u'Hello'), u"u'Hello'") + self.assertEqual(u'{0!r}'.format(F(u'Hello')), u'F(Hello)') + + # test fallback to object.__format__ + self.assertEqual(u'{0}'.format({}), u'{}') + self.assertEqual(u'{0}'.format([]), u'[]') + self.assertEqual(u'{0}'.format([1]), u'[1]') + self.assertEqual(u'{0}'.format(E(u'data')), u'E(data)') + self.assertEqual(u'{0:^10}'.format(E(u'data')), u' E(data) ') + self.assertEqual(u'{0:^10s}'.format(E(u'data')), u' E(data) ') + self.assertEqual(u'{0:d}'.format(G(u'data')), u'G(data)') + self.assertEqual(u'{0:>15s}'.format(G(u'data')), u' string is data') + self.assertEqual(u'{0!s}'.format(G(u'data')), u'string is data') + + self.assertEqual("{0:date: %Y-%m-%d}".format(I(year=2007, + month=8, + day=27)), + "date: 2007-08-27") + + # test deriving from a builtin type and overriding __format__ + self.assertEqual("{0}".format(J(10)), "20") + + + # string format specifiers + self.assertEqual('{0:}'.format('a'), 'a') + + # computed format specifiers + self.assertEqual("{0:.{1}}".format('hello world', 5), 'hello') + self.assertEqual("{0:.{1}s}".format('hello world', 5), 'hello') + self.assertEqual("{0:.{precision}s}".format('hello world', precision=5), 'hello') + self.assertEqual("{0:{width}.{precision}s}".format('hello world', width=10, precision=5), 'hello ') + self.assertEqual("{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), 'hello ') + + # test various errors + self.assertRaises(ValueError, '{'.format) + self.assertRaises(ValueError, '}'.format) + self.assertRaises(ValueError, 'a{'.format) + self.assertRaises(ValueError, 'a}'.format) + self.assertRaises(ValueError, '{a'.format) + self.assertRaises(ValueError, '}a'.format) + self.assertRaises(IndexError, '{0}'.format) + self.assertRaises(IndexError, '{1}'.format, 'abc') + self.assertRaises(KeyError, '{x}'.format) + self.assertRaises(ValueError, "}{".format) + self.assertRaises(ValueError, "{".format) + self.assertRaises(ValueError, "}".format) + self.assertRaises(ValueError, "abc{0:{}".format) + self.assertRaises(ValueError, "{0".format) + self.assertRaises(IndexError, "{0.}".format) + self.assertRaises(ValueError, "{0.}".format, 0) + self.assertRaises(IndexError, "{0[}".format) + self.assertRaises(ValueError, "{0[}".format, []) + self.assertRaises(KeyError, "{0]}".format) + self.assertRaises(ValueError, "{0.[]}".format, 0) + self.assertRaises(ValueError, "{0..foo}".format, 0) + self.assertRaises(ValueError, "{0[0}".format, 0) + self.assertRaises(ValueError, "{0[0:foo}".format, 0) + self.assertRaises(KeyError, "{c]}".format) + self.assertRaises(ValueError, "{{ {{{0}}".format, 0) + self.assertRaises(ValueError, "{0}}".format, 0) + self.assertRaises(KeyError, "{foo}".format, bar=3) + self.assertRaises(ValueError, "{0!x}".format, 3) + self.assertRaises(ValueError, "{0!}".format, 0) + self.assertRaises(ValueError, "{0!rs}".format, 0) + self.assertRaises(ValueError, "{!}".format) + self.assertRaises(ValueError, "{:}".format) + self.assertRaises(ValueError, "{:s}".format) + self.assertRaises(ValueError, "{}".format) + + # can't have a replacement on the field name portion + self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4) + + # exceed maximum recursion depth + self.assertRaises(ValueError, "{0:{1:{2}}}".format, 'abc', 's', '') + self.assertRaises(ValueError, "{0:{1:{2:{3:{4:{5:{6}}}}}}}".format, + 0, 1, 2, 3, 4, 5, 6, 7) + + # string format spec errors + self.assertRaises(ValueError, "{0:-s}".format, '') + self.assertRaises(ValueError, format, "", "-") + self.assertRaises(ValueError, "{0:=s}".format, '') def test_main(): test_support.run_unittest(__name__) diff --git a/Makefile.pre.in b/Makefile.pre.in index ec7c0d8..3c42409 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -281,6 +281,8 @@ PYTHON_OBJS= \ Python/getopt.o \ Python/pystrcmp.o \ Python/pystrtod.o \ + Python/formatter_unicode.o \ + Python/formatter_string.o \ Python/$(DYNLOADFILE) \ $(LIBOBJS) \ $(MACHDEP_OBJS) \ @@ -515,6 +517,20 @@ Python/importdl.o: $(srcdir)/Python/importdl.c Objects/unicodectype.o: $(srcdir)/Objects/unicodectype.c \ $(srcdir)/Objects/unicodetype_db.h +Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c \ + $(srcdir)/Objects/stringlib/string_format.h \ + $(srcdir)/Objects/stringlib/unicodedefs.h \ + $(srcdir)/Objects/stringlib/fastsearch.h \ + $(srcdir)/Objects/stringlib/count.h \ + $(srcdir)/Objects/stringlib/find.h \ + $(srcdir)/Objects/stringlib/partition.h + +Python/formatter_unicode.o: $(srcdir)/Python/formatter_unicode.c \ + $(srcdir)/Objects/stringlib/formatter.h + +Python/formatter_string.o: $(srcdir)/Python/formatter_string.c \ + $(srcdir)/Objects/stringlib/formatter.h + ############################################################################ # Header files diff --git a/Modules/datetimemodule.c b/Modules/datetimemodule.c index 83e547e..fb11e35 100644 --- a/Modules/datetimemodule.c +++ b/Modules/datetimemodule.c @@ -2469,6 +2469,32 @@ date_strftime(PyDateTime_Date *self, PyObject *args, PyObject *kw) return result; } +static PyObject * +date_format(PyDateTime_Date *self, PyObject *args) +{ + PyObject *format; + + if (!PyArg_ParseTuple(args, "O:__format__", &format)) + return NULL; + + /* Check for str or unicode */ + if (PyString_Check(format)) { + /* If format is zero length, return str(self) */ + if (PyString_GET_SIZE(format) == 0) + return PyObject_Str((PyObject *)self); + } else if (PyUnicode_Check(format)) { + /* If format is zero length, return str(self) */ + if (PyUnicode_GET_SIZE(format) == 0) + return PyObject_Unicode((PyObject *)self); + } else { + PyErr_Format(PyExc_ValueError, + "__format__ expects str or unicode, not %.200s", + Py_TYPE(format)->tp_name); + return NULL; + } + return PyObject_CallMethod((PyObject *)self, "strftime", "O", format); +} + /* ISO methods. */ static PyObject * @@ -2633,6 +2659,9 @@ static PyMethodDef date_methods[] = { {"strftime", (PyCFunction)date_strftime, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("format -> strftime() style string.")}, + {"__format__", (PyCFunction)date_format, METH_VARARGS, + PyDoc_STR("Formats self with strftime.")}, + {"timetuple", (PyCFunction)date_timetuple, METH_NOARGS, PyDoc_STR("Return time tuple, compatible with time.localtime().")}, @@ -3418,6 +3447,9 @@ static PyMethodDef time_methods[] = { {"strftime", (PyCFunction)time_strftime, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("format -> strftime() style string.")}, + {"__format__", (PyCFunction)date_format, METH_VARARGS, + PyDoc_STR("Formats self with strftime.")}, + {"utcoffset", (PyCFunction)time_utcoffset, METH_NOARGS, PyDoc_STR("Return self.tzinfo.utcoffset(self).")}, diff --git a/Objects/abstract.c b/Objects/abstract.c index a377e76..071cbdc 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -348,6 +348,138 @@ int PyObject_AsWriteBuffer(PyObject *obj, return 0; } +PyObject * +PyObject_Format(PyObject* obj, PyObject *format_spec) +{ + static PyObject * str__format__ = NULL; + PyObject *empty = NULL; + PyObject *result = NULL; + int spec_is_unicode; + int result_is_unicode; + + /* Initialize cached value */ + if (str__format__ == NULL) { + /* Initialize static variable needed by _PyType_Lookup */ + str__format__ = PyString_InternFromString("__format__"); + if (str__format__ == NULL) + goto done; + } + + /* If no format_spec is provided, use an empty string */ + if (format_spec == NULL) { + empty = PyString_FromStringAndSize(NULL, 0); + format_spec = empty; + } + + /* Check the format_spec type, and make sure it's str or unicode */ + if (PyUnicode_Check(format_spec)) + spec_is_unicode = 1; + else if (PyString_Check(format_spec)) + spec_is_unicode = 0; + else { + PyErr_Format(PyExc_TypeError, + "format expects arg 2 to be string " + "or unicode, not %.100s", Py_TYPE(format_spec)->tp_name); + goto done; + } + + /* Make sure the type is initialized. float gets initialized late */ + if (Py_TYPE(obj)->tp_dict == NULL) + if (PyType_Ready(Py_TYPE(obj)) < 0) + goto done; + + /* Check for a __format__ method and call it. */ + if (PyInstance_Check(obj)) { + /* We're an instance of a classic class */ + PyObject *bound_method = PyObject_GetAttr(obj, + str__format__); + if (bound_method != NULL) { + result = PyObject_CallFunctionObjArgs(bound_method, + format_spec, + NULL); + Py_DECREF(bound_method); + } else { + PyObject *self_as_str; + PyObject *format_method; + + PyErr_Clear(); + /* Per the PEP, convert to str (or unicode, + depending on the type of the format + specifier). For new-style classes, this + logic is done by object.__format__(). */ + if (spec_is_unicode) + self_as_str = PyObject_Unicode(obj); + else + self_as_str = PyObject_Str(obj); + if (self_as_str == NULL) + goto done; + + /* Then call str.__format__ on that result */ + format_method = PyObject_GetAttr(self_as_str, + str__format__); + if (format_method == NULL) { + Py_DECREF(self_as_str); + goto done; + } + result = PyObject_CallFunctionObjArgs(format_method, + format_spec, + NULL); + Py_DECREF(self_as_str); + Py_DECREF(format_method); + if (result == NULL) + goto done; + } + } else { + /* Not an instance of a classic class, use the code + from py3k */ + + /* Find the (unbound!) __format__ method (a borrowed + reference) */ + PyObject *method = _PyType_Lookup(Py_TYPE(obj), + str__format__); + if (method == NULL) { + PyErr_Format(PyExc_TypeError, + "Type %.100s doesn't define __format__", + Py_TYPE(obj)->tp_name); + goto done; + } + /* And call it, binding it to the value */ + result = PyObject_CallFunctionObjArgs(method, obj, + format_spec, NULL); + } + + if (result == NULL) + goto done; + + /* Check the result type, and make sure it's str or unicode */ + if (PyUnicode_Check(result)) + result_is_unicode = 1; + else if (PyString_Check(result)) + result_is_unicode = 0; + else { + PyErr_Format(PyExc_TypeError, + "%.100s.__format__ must return string or " + "unicode, not %.100s", Py_TYPE(obj)->tp_name, + Py_TYPE(result)->tp_name); + Py_DECREF(result); + result = NULL; + goto done; + } + + /* Convert to unicode, if needed. Required if spec is unicode + and result is str */ + if (spec_is_unicode && !result_is_unicode) { + PyObject *tmp = PyObject_Unicode(result); + /* This logic works whether or not tmp is NULL */ + Py_DECREF(result); + result = tmp; + } + +done: + Py_XDECREF(empty); + return result; +} + /* Operations on numbers */ int diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 19149af..392a037 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -10,6 +10,7 @@ #include <ctype.h> #include <float.h> +#include "formatter_string.h" #if !defined(__STDC__) extern double fmod(double, double); @@ -1434,6 +1435,46 @@ float_getzero(PyObject *v, void *closure) return PyFloat_FromDouble(0.0); } +static PyObject * +float__format__(PyObject *self, PyObject *args) +{ + PyObject *format_spec; + + if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) + return NULL; + if (PyString_Check(format_spec)) + return string_float__format__(self, args); + if (PyUnicode_Check(format_spec)) { + /* Convert format_spec to a str */ + PyObject *result = NULL; + PyObject *newargs = NULL; + PyObject *string_format_spec = NULL; + + string_format_spec = PyObject_Str(format_spec); + if (string_format_spec == NULL) + goto done; + + newargs = Py_BuildValue("(O)", string_format_spec); + if (newargs == NULL) + goto done; + + result = string_float__format__(self, newargs); + + done: + Py_XDECREF(string_format_spec); + Py_XDECREF(newargs); + return result; + } + PyErr_SetString(PyExc_TypeError, "__format__ requires str or unicode"); + return NULL; +} + +PyDoc_STRVAR(float__format__doc, +"float.__format__(format_spec) -> string\n" +"\n" +"Formats the float according to format_spec."); + + static PyMethodDef float_methods[] = { {"conjugate", (PyCFunction)float_float, METH_NOARGS, "Returns self, the complex conjugate of any float."}, @@ -1446,6 +1487,8 @@ static PyMethodDef float_methods[] = { METH_O|METH_CLASS, float_getformat_doc}, {"__setformat__", (PyCFunction)float_setformat, METH_VARARGS|METH_CLASS, float_setformat_doc}, + {"__format__", (PyCFunction)float__format__, + METH_VARARGS, float__format__doc}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/intobject.c b/Objects/intobject.c index 2fdea25..65c1c23 100644 --- a/Objects/intobject.c +++ b/Objects/intobject.c @@ -3,6 +3,7 @@ #include "Python.h" #include <ctype.h> +#include "formatter_string.h" static PyObject *int_int(PyIntObject *v); @@ -1108,12 +1109,47 @@ _PyInt_Format(PyIntObject *v, int base, int newstyle) return PyString_FromStringAndSize(p, &buf[sizeof(buf)] - p); } +static PyObject * +int__format__(PyObject *self, PyObject *args) +{ + PyObject *format_spec; + + if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) + return NULL; + if (PyString_Check(format_spec)) + return string_int__format__(self, args); + if (PyUnicode_Check(format_spec)) { + /* Convert format_spec to a str */ + PyObject *result = NULL; + PyObject *newargs = NULL; + PyObject *string_format_spec = NULL; + + string_format_spec = PyObject_Str(format_spec); + if (string_format_spec == NULL) + goto done; + + newargs = Py_BuildValue("(O)", string_format_spec); + if (newargs == NULL) + goto done; + + result = string_int__format__(self, newargs); + + done: + Py_XDECREF(string_format_spec); + Py_XDECREF(newargs); + return result; + } + PyErr_SetString(PyExc_TypeError, "__format__ requires str or unicode"); + return NULL; +} + static PyMethodDef int_methods[] = { {"conjugate", (PyCFunction)int_int, METH_NOARGS, "Returns self, the complex conjugate of any int."}, {"__trunc__", (PyCFunction)int_int, METH_NOARGS, "Truncating an Integral returns itself."}, {"__getnewargs__", (PyCFunction)int_getnewargs, METH_NOARGS}, + {"__format__", (PyCFunction)int__format__, METH_VARARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/longobject.c b/Objects/longobject.c index 3ee2992..46ed713 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6,6 +6,7 @@ #include "Python.h" #include "longintrepr.h" +#include "formatter_string.h" #include <ctype.h> @@ -3380,12 +3381,47 @@ long_getN(PyLongObject *v, void *context) { return PyLong_FromLong((intptr_t)context); } +static PyObject * +long__format__(PyObject *self, PyObject *args) +{ + PyObject *format_spec; + + if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) + return NULL; + if (PyString_Check(format_spec)) + return string_long__format__(self, args); + if (PyUnicode_Check(format_spec)) { + /* Convert format_spec to a str */ + PyObject *result = NULL; + PyObject *newargs = NULL; + PyObject *string_format_spec = NULL; + + string_format_spec = PyObject_Str(format_spec); + if (string_format_spec == NULL) + goto done; + + newargs = Py_BuildValue("(O)", string_format_spec); + if (newargs == NULL) + goto done; + + result = string_long__format__(self, newargs); + + done: + Py_XDECREF(string_format_spec); + Py_XDECREF(newargs); + return result; + } + PyErr_SetString(PyExc_TypeError, "__format__ requires str or unicode"); + return NULL; +} + static PyMethodDef long_methods[] = { {"conjugate", (PyCFunction)long_long, METH_NOARGS, "Returns self, the complex conjugate of any long."}, {"__trunc__", (PyCFunction)long_long, METH_NOARGS, "Truncating an Integral returns itself."}, {"__getnewargs__", (PyCFunction)long_getnewargs, METH_NOARGS}, + {"__format__", (PyCFunction)long__format__, METH_VARARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h new file mode 100644 index 0000000..39da6b3 --- /dev/null +++ b/Objects/stringlib/formatter.h @@ -0,0 +1,980 @@ +/* implements the string, long, and float formatters. that is, + string.__format__, etc. */ + +/* Before including this, you must include either: + stringlib/unicodedefs.h + stringlib/stringdefs.h + + Also, you should define the names: + FORMAT_STRING + FORMAT_LONG + FORMAT_FLOAT + to be whatever you want the public names of these functions to + be. These are the only non-static functions defined here. +*/ + +#define ALLOW_PARENS_FOR_SIGN 0 + +/* + get_integer consumes 0 or more decimal digit characters from an + input string, updates *result with the corresponding positive + integer, and returns the number of digits consumed. + + returns -1 on error. +*/ +static int +get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end, + Py_ssize_t *result) +{ + Py_ssize_t accumulator, digitval, oldaccumulator; + int numdigits; + accumulator = numdigits = 0; + for (;;(*ptr)++, numdigits++) { + if (*ptr >= end) + break; + digitval = STRINGLIB_TODECIMAL(**ptr); + if (digitval < 0) + break; + /* + This trick was copied from old Unicode format code. It's cute, + but would really suck on an old machine with a slow divide + implementation. Fortunately, in the normal case we do not + expect too many digits. + */ + oldaccumulator = accumulator; + accumulator *= 10; + if ((accumulator+10)/10 != oldaccumulator+1) { + PyErr_Format(PyExc_ValueError, + "Too many decimal digits in format string"); + return -1; + } + accumulator += digitval; + } + *result = accumulator; + return numdigits; +} + +/************************************************************************/ +/*********** standard format specifier parsing **************************/ +/************************************************************************/ + +/* returns true if this character is a specifier alignment token */ +Py_LOCAL_INLINE(int) +is_alignment_token(STRINGLIB_CHAR c) +{ + switch (c) { + case '<': case '>': case '=': case '^': + return 1; + default: + return 0; + } +} + +/* returns true if this character is a sign element */ +Py_LOCAL_INLINE(int) +is_sign_element(STRINGLIB_CHAR c) +{ + switch (c) { + case ' ': case '+': case '-': +#if ALLOW_PARENS_FOR_SIGN + case '(': +#endif + return 1; + default: + return 0; + } +} + + +typedef struct { + STRINGLIB_CHAR fill_char; + STRINGLIB_CHAR align; + STRINGLIB_CHAR sign; + Py_ssize_t width; + Py_ssize_t precision; + STRINGLIB_CHAR type; +} InternalFormatSpec; + +/* + ptr points to the start of the format_spec, end points just past its end. + fills in format with the parsed information. + returns 1 on success, 0 on failure. + if failure, sets the exception +*/ +static int +parse_internal_render_format_spec(PyObject *format_spec, + InternalFormatSpec *format, + char default_type) +{ + STRINGLIB_CHAR *ptr = STRINGLIB_STR(format_spec); + STRINGLIB_CHAR *end = ptr + STRINGLIB_LEN(format_spec); + + /* end-ptr is used throughout this code to specify the length of + the input string */ + + Py_ssize_t specified_width; + + format->fill_char = '\0'; + format->align = '\0'; + format->sign = '\0'; + format->width = -1; + format->precision = -1; + format->type = default_type; + + /* If the second char is an alignment token, + then parse the fill char */ + if (end-ptr >= 2 && is_alignment_token(ptr[1])) { + format->align = ptr[1]; + format->fill_char = ptr[0]; + ptr += 2; + } + else if (end-ptr >= 1 && is_alignment_token(ptr[0])) { + format->align = ptr[0]; + ptr++; + } + + /* Parse the various sign options */ + if (end-ptr >= 1 && is_sign_element(ptr[0])) { + format->sign = ptr[0]; + ptr++; +#if ALLOW_PARENS_FOR_SIGN + if (end-ptr >= 1 && ptr[0] == ')') { + ptr++; + } +#endif + } + + /* The special case for 0-padding (backwards compat) */ + if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') { + format->fill_char = '0'; + if (format->align == '\0') { + format->align = '='; + } + ptr++; + } + + /* XXX add error checking */ + specified_width = get_integer(&ptr, end, &format->width); + + /* if specified_width is 0, we didn't consume any characters for + the width. in that case, reset the width to -1, because + get_integer() will have set it to zero */ + if (specified_width == 0) { + format->width = -1; + } + + /* Parse field precision */ + if (end-ptr && ptr[0] == '.') { + ptr++; + + /* XXX add error checking */ + specified_width = get_integer(&ptr, end, &format->precision); + + /* not having a precision after a dot is an error */ + if (specified_width == 0) { + PyErr_Format(PyExc_ValueError, + "Format specifier missing precision"); + return 0; + } + + } + + /* Finally, parse the type field */ + + if (end-ptr > 1) { + /* invalid conversion spec */ + PyErr_Format(PyExc_ValueError, "Invalid conversion specification"); + return 0; + } + + if (end-ptr == 1) { + format->type = ptr[0]; + ptr++; + } + + return 1; +} + +#if defined FORMAT_FLOAT || defined FORMAT_LONG +/************************************************************************/ +/*********** common routines for numeric formatting *********************/ +/************************************************************************/ + +/* describes the layout for an integer, see the comment in + _calc_integer_widths() for details */ +typedef struct { + Py_ssize_t n_lpadding; + Py_ssize_t n_spadding; + Py_ssize_t n_rpadding; + char lsign; + Py_ssize_t n_lsign; + char rsign; + Py_ssize_t n_rsign; + Py_ssize_t n_total; /* just a convenience, it's derivable from the + other fields */ +} NumberFieldWidths; + +/* not all fields of format are used. for example, precision is + unused. should this take discrete params in order to be more clear + about what it does? or is passing a single format parameter easier + and more efficient enough to justify a little obfuscation? */ +static void +calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign, + Py_ssize_t n_digits, const InternalFormatSpec *format) +{ + r->n_lpadding = 0; + r->n_spadding = 0; + r->n_rpadding = 0; + r->lsign = '\0'; + r->n_lsign = 0; + r->rsign = '\0'; + r->n_rsign = 0; + + /* the output will look like: + | | + | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> | + | | + + lsign and rsign are computed from format->sign and the actual + sign of the number + + digits is already known + + the total width is either given, or computed from the + actual digits + + only one of lpadding, spadding, and rpadding can be non-zero, + and it's calculated from the width and other fields + */ + + /* compute the various parts we're going to write */ + if (format->sign == '+') { + /* always put a + or - */ + r->n_lsign = 1; + r->lsign = (actual_sign == '-' ? '-' : '+'); + } +#if ALLOW_PARENS_FOR_SIGN + else if (format->sign == '(') { + if (actual_sign == '-') { + r->n_lsign = 1; + r->lsign = '('; + r->n_rsign = 1; + r->rsign = ')'; + } + } +#endif + else if (format->sign == ' ') { + r->n_lsign = 1; + r->lsign = (actual_sign == '-' ? '-' : ' '); + } + else { + /* non specified, or the default (-) */ + if (actual_sign == '-') { + r->n_lsign = 1; + r->lsign = '-'; + } + } + + /* now the number of padding characters */ + if (format->width == -1) { + /* no padding at all, nothing to do */ + } + else { + /* see if any padding is needed */ + if (r->n_lsign + n_digits + r->n_rsign >= format->width) { + /* no padding needed, we're already bigger than the + requested width */ + } + else { + /* determine which of left, space, or right padding is + needed */ + Py_ssize_t padding = format->width - + (r->n_lsign + n_digits + r->n_rsign); + if (format->align == '<') + r->n_rpadding = padding; + else if (format->align == '>') + r->n_lpadding = padding; + else if (format->align == '^') { + r->n_lpadding = padding / 2; + r->n_rpadding = padding - r->n_lpadding; + } + else if (format->align == '=') + r->n_spadding = padding; + else + r->n_lpadding = padding; + } + } + r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding + + n_digits + r->n_rsign + r->n_rpadding; +} + +/* fill in the non-digit parts of a numbers's string representation, + as determined in _calc_integer_widths(). returns the pointer to + where the digits go. */ +static STRINGLIB_CHAR * +fill_number(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec, + Py_ssize_t n_digits, STRINGLIB_CHAR fill_char) +{ + STRINGLIB_CHAR* p_digits; + + if (spec->n_lpadding) { + STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding); + p_buf += spec->n_lpadding; + } + if (spec->n_lsign == 1) { + *p_buf++ = spec->lsign; + } + if (spec->n_spadding) { + STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding); + p_buf += spec->n_spadding; + } + p_digits = p_buf; + p_buf += n_digits; + if (spec->n_rsign == 1) { + *p_buf++ = spec->rsign; + } + if (spec->n_rpadding) { + STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding); + p_buf += spec->n_rpadding; + } + return p_digits; +} +#endif /* FORMAT_FLOAT || FORMAT_LONG */ + +/************************************************************************/ +/*********** string formatting ******************************************/ +/************************************************************************/ + +static PyObject * +format_string_internal(PyObject *value, const InternalFormatSpec *format) +{ + Py_ssize_t width; /* total field width */ + Py_ssize_t lpad; + STRINGLIB_CHAR *dst; + STRINGLIB_CHAR *src = STRINGLIB_STR(value); + Py_ssize_t len = STRINGLIB_LEN(value); + PyObject *result = NULL; + + /* sign is not allowed on strings */ + if (format->sign != '\0') { + PyErr_SetString(PyExc_ValueError, + "Sign not allowed in string format specifier"); + goto done; + } + + /* '=' alignment not allowed on strings */ + if (format->align == '=') { + PyErr_SetString(PyExc_ValueError, + "'=' alignment not allowed " + "in string format specifier"); + goto done; + } + + /* if precision is specified, output no more that format.precision + characters */ + if (format->precision >= 0 && len >= format->precision) { + len = format->precision; + } + + if (format->width >= 0) { + width = format->width; + + /* but use at least len characters */ + if (len > width) { + width = len; + } + } + else { + /* not specified, use all of the chars and no more */ + width = len; + } + + /* allocate the resulting string */ + result = STRINGLIB_NEW(NULL, width); + if (result == NULL) + goto done; + + /* now write into that space */ + dst = STRINGLIB_STR(result); + + /* figure out how much leading space we need, based on the + aligning */ + if (format->align == '>') + lpad = width - len; + else if (format->align == '^') + lpad = (width - len) / 2; + else + lpad = 0; + + /* if right aligning, increment the destination allow space on the + left */ + memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR)); + + /* do any padding */ + if (width > len) { + STRINGLIB_CHAR fill_char = format->fill_char; + if (fill_char == '\0') { + /* use the default, if not specified */ + fill_char = ' '; + } + + /* pad on left */ + if (lpad) + STRINGLIB_FILL(dst, fill_char, lpad); + + /* pad on right */ + if (width - len - lpad) + STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad); + } + +done: + return result; +} + + +/************************************************************************/ +/*********** long formatting ********************************************/ +/************************************************************************/ + +#if defined FORMAT_LONG || defined FORMAT_INT +typedef PyObject* +(*IntOrLongToString)(PyObject *value, int base); + +static PyObject * +format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, + IntOrLongToString tostring) +{ + PyObject *result = NULL; + PyObject *tmp = NULL; + STRINGLIB_CHAR *pnumeric_chars; + STRINGLIB_CHAR numeric_char; + STRINGLIB_CHAR sign = '\0'; + STRINGLIB_CHAR *p; + Py_ssize_t n_digits; /* count of digits need from the computed + string */ + Py_ssize_t n_leading_chars; + NumberFieldWidths spec; + long x; + + /* no precision allowed on integers */ + if (format->precision != -1) { + PyErr_SetString(PyExc_ValueError, + "Precision not allowed in integer format specifier"); + goto done; + } + + + /* special case for character formatting */ + if (format->type == 'c') { + /* error to specify a sign */ + if (format->sign != '\0') { + PyErr_SetString(PyExc_ValueError, + "Sign not allowed with integer" + " format specifier 'c'"); + goto done; + } + + /* taken from unicodeobject.c formatchar() */ + /* Integer input truncated to a character */ +/* XXX: won't work for int */ + x = PyLong_AsLong(value); + if (x == -1 && PyErr_Occurred()) + goto done; +#ifdef Py_UNICODE_WIDE + if (x < 0 || x > 0x10ffff) { + PyErr_SetString(PyExc_OverflowError, + "%c arg not in range(0x110000) " + "(wide Python build)"); + goto done; + } +#else + if (x < 0 || x > 0xffff) { + PyErr_SetString(PyExc_OverflowError, + "%c arg not in range(0x10000) " + "(narrow Python build)"); + goto done; + } +#endif + numeric_char = (STRINGLIB_CHAR)x; + pnumeric_chars = &numeric_char; + n_digits = 1; + } + else { + int base; + int leading_chars_to_skip; /* Number of characters added by + PyNumber_ToBase that we want to + skip over. */ + + /* Compute the base and how many characters will be added by + PyNumber_ToBase */ + switch (format->type) { + case 'b': + base = 2; + leading_chars_to_skip = 2; /* 0b */ + break; + case 'o': + base = 8; + leading_chars_to_skip = 2; /* 0o */ + break; + case 'x': + case 'X': + base = 16; + leading_chars_to_skip = 2; /* 0x */ + break; + default: /* shouldn't be needed, but stops a compiler warning */ + case 'd': + base = 10; + leading_chars_to_skip = 0; + break; + } + + /* Do the hard part, converting to a string in a given base */ + tmp = tostring(value, base); + if (tmp == NULL) + goto done; + + pnumeric_chars = STRINGLIB_STR(tmp); + n_digits = STRINGLIB_LEN(tmp); + + /* Remember not to modify what pnumeric_chars points to. it + might be interned. Only modify it after we copy it into a + newly allocated output buffer. */ + + /* Is a sign character present in the output? If so, remember it + and skip it */ + sign = pnumeric_chars[0]; + if (sign == '-') { + ++leading_chars_to_skip; + } + + /* Skip over the leading chars (0x, 0b, etc.) */ + n_digits -= leading_chars_to_skip; + pnumeric_chars += leading_chars_to_skip; + } + + /* Calculate the widths of the various leading and trailing parts */ + calc_number_widths(&spec, sign, n_digits, format); + + /* Allocate a new string to hold the result */ + result = STRINGLIB_NEW(NULL, spec.n_total); + if (!result) + goto done; + p = STRINGLIB_STR(result); + + /* Fill in the digit parts */ + n_leading_chars = spec.n_lpadding + spec.n_lsign + spec.n_spadding; + memmove(p + n_leading_chars, + pnumeric_chars, + n_digits * sizeof(STRINGLIB_CHAR)); + + /* if X, convert to uppercase */ + if (format->type == 'X') { + Py_ssize_t t; + for (t = 0; t < n_digits; t++) + p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]); + } + + /* Fill in the non-digit parts */ + fill_number(p, &spec, n_digits, + format->fill_char == '\0' ? ' ' : format->fill_char); + +done: + Py_XDECREF(tmp); + return result; +} +#endif /* defined FORMAT_LONG || defined FORMAT_INT */ + +/************************************************************************/ +/*********** float formatting *******************************************/ +/************************************************************************/ + +#ifdef FORMAT_FLOAT +#if STRINGLIB_IS_UNICODE +/* taken from unicodeobject.c */ +static Py_ssize_t +strtounicode(Py_UNICODE *buffer, const char *charbuffer) +{ + register Py_ssize_t i; + Py_ssize_t len = strlen(charbuffer); + for (i = len - 1; i >= 0; i--) + buffer[i] = (Py_UNICODE) charbuffer[i]; + + return len; +} +#endif + +/* the callback function to call to do the actual float formatting. + it matches the definition of PyOS_ascii_formatd */ +typedef char* +(*DoubleSnprintfFunction)(char *buffer, size_t buf_len, + const char *format, double d); + +/* just a wrapper to make PyOS_snprintf look like DoubleSnprintfFunction */ +static char* +snprintf_double(char *buffer, size_t buf_len, const char *format, double d) +{ + PyOS_snprintf(buffer, buf_len, format, d); + return NULL; +} + +/* see FORMATBUFLEN in unicodeobject.c */ +#define FLOAT_FORMATBUFLEN 120 + +/* much of this is taken from unicodeobject.c */ +/* use type instead of format->type, so that it can be overridden by + format_number() */ +static PyObject * +_format_float(STRINGLIB_CHAR type, PyObject *value, + const InternalFormatSpec *format, + DoubleSnprintfFunction snprintf) +{ + /* fmt = '%.' + `prec` + `type` + '%%' + worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/ + char fmt[20]; + + /* taken from unicodeobject.c */ + /* Worst case length calc to ensure no buffer overrun: + + 'g' formats: + fmt = %#.<prec>g + buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp + for any double rep.) + len = 1 + prec + 1 + 2 + 5 = 9 + prec + + 'f' formats: + buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50) + len = 1 + 50 + 1 + prec = 52 + prec + + If prec=0 the effective precision is 1 (the leading digit is + always given), therefore increase the length by one. + + */ + char charbuf[FLOAT_FORMATBUFLEN]; + Py_ssize_t n_digits; + double x; + Py_ssize_t precision = format->precision; + PyObject *result = NULL; + STRINGLIB_CHAR sign; + char* trailing = ""; + STRINGLIB_CHAR *p; + NumberFieldWidths spec; + +#if STRINGLIB_IS_UNICODE + Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN]; +#endif + + /* first, do the conversion as 8-bit chars, using the platform's + snprintf. then, if needed, convert to unicode. */ + + /* 'F' is the same as 'f', per the PEP */ + if (type == 'F') + type = 'f'; + + x = PyFloat_AsDouble(value); + + if (x == -1.0 && PyErr_Occurred()) + goto done; + + if (type == '%') { + type = 'f'; + x *= 100; + trailing = "%"; + } + + if (precision < 0) + precision = 6; + if (type == 'f' && (fabs(x) / 1e25) >= 1e25) + type = 'g'; + + /* cast "type", because if we're in unicode we need to pass a + 8-bit char. this is safe, because we've restricted what "type" + can be */ + PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision, + (char)type); + + /* call the passed in function to do the actual formatting */ + snprintf(charbuf, sizeof(charbuf), fmt, x); + + /* adding trailing to fmt with PyOS_snprintf doesn't work, not + sure why. we'll just concatentate it here, no harm done. we + know we can't have a buffer overflow from the fmt size + analysis */ + strcat(charbuf, trailing); + + /* rather than duplicate the code for snprintf for both unicode + and 8 bit strings, we just use the 8 bit version and then + convert to unicode in a separate code path. that's probably + the lesser of 2 evils. */ +#if STRINGLIB_IS_UNICODE + n_digits = strtounicode(unicodebuf, charbuf); + p = unicodebuf; +#else + /* compute the length. I believe this is done because the return + value from snprintf above is unreliable */ + n_digits = strlen(charbuf); + p = charbuf; +#endif + + /* is a sign character present in the output? if so, remember it + and skip it */ + sign = p[0]; + if (sign == '-') { + p++; + n_digits--; + } + + calc_number_widths(&spec, sign, n_digits, format); + + /* allocate a string with enough space */ + result = STRINGLIB_NEW(NULL, spec.n_total); + if (result == NULL) + goto done; + + /* fill in the non-digit parts */ + fill_number(STRINGLIB_STR(result), &spec, n_digits, + format->fill_char == '\0' ? ' ' : format->fill_char); + + /* fill in the digit parts */ + memmove(STRINGLIB_STR(result) + + (spec.n_lpadding + spec.n_lsign + spec.n_spadding), + p, + n_digits * sizeof(STRINGLIB_CHAR)); + +done: + return result; +} + +static PyObject * +format_float_internal(PyObject *value, const InternalFormatSpec *format) +{ + if (format->type == 'n') + return _format_float('f', value, format, snprintf_double); + else + return _format_float(format->type, value, format, PyOS_ascii_formatd); +} +#endif /* FORMAT_FLOAT */ + +/************************************************************************/ +/*********** built in formatters ****************************************/ +/************************************************************************/ +#ifdef FORMAT_STRING +PyObject * +FORMAT_STRING(PyObject* value, PyObject* args) +{ + PyObject *format_spec; + PyObject *result = NULL; +#if PY_VERSION_HEX < 0x03000000 + PyObject *tmp = NULL; +#endif + InternalFormatSpec format; + + /* If 2.x, we accept either str or unicode, and try to convert it + to the right type. In 3.x, we insist on only unicode */ +#if PY_VERSION_HEX >= 0x03000000 + if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__", + &format_spec)) + goto done; +#else + /* If 2.x, convert format_spec to the same type as value */ + /* This is to allow things like u''.format('') */ + if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) + goto done; + if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) { + PyErr_Format(PyExc_TypeError, "__format__ arg must be str " + "or unicode, not %s", Py_TYPE(format_spec)->tp_name); + goto done; + } + tmp = STRINGLIB_TOSTR(format_spec); + if (tmp == NULL) + goto done; + format_spec = tmp; +#endif + + /* check for the special case of zero length format spec, make + it equivalent to str(value) */ + if (STRINGLIB_LEN(format_spec) == 0) { + result = STRINGLIB_TOSTR(value); + goto done; + } + + + /* parse the format_spec */ + if (!parse_internal_render_format_spec(format_spec, &format, 's')) + goto done; + + /* type conversion? */ + switch (format.type) { + case 's': + /* no type conversion needed, already a string. do the formatting */ + result = format_string_internal(value, &format); + break; + default: + /* unknown */ + PyErr_Format(PyExc_ValueError, "Unknown conversion type %c", + format.type); + goto done; + } + +done: +#if PY_VERSION_HEX < 0x03000000 + Py_XDECREF(tmp); +#endif + return result; +} +#endif /* FORMAT_STRING */ + +#if defined FORMAT_LONG || defined FORMAT_INT +static PyObject* +format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring) +{ + PyObject *format_spec; + PyObject *result = NULL; + PyObject *tmp = NULL; + InternalFormatSpec format; + + if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__", + &format_spec)) + goto done; + + /* check for the special case of zero length format spec, make + it equivalent to str(value) */ + if (STRINGLIB_LEN(format_spec) == 0) { + result = STRINGLIB_TOSTR(value); + goto done; + } + + /* parse the format_spec */ + if (!parse_internal_render_format_spec(format_spec, &format, 'd')) + goto done; + + /* type conversion? */ + switch (format.type) { + case 'b': + case 'c': + case 'd': + case 'o': + case 'x': + case 'X': + /* no type conversion needed, already an int (or long). do + the formatting */ + result = format_int_or_long_internal(value, &format, tostring); + break; + + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + case 'n': + case '%': + /* convert to float */ + tmp = PyNumber_Float(value); + if (tmp == NULL) + goto done; + result = format_float_internal(value, &format); + break; + + default: + /* unknown */ + PyErr_Format(PyExc_ValueError, "Unknown conversion type %c", + format.type); + goto done; + } + +done: + Py_XDECREF(tmp); + return result; +} +#endif /* FORMAT_LONG || defined FORMAT_INT */ + +#ifdef FORMAT_LONG +/* Need to define long_format as a function that will convert a long + to a string. In 3.0, _PyLong_Format has the correct signature. In + 2.x, we need to fudge a few parameters */ +#if PY_VERSION_HEX >= 0x03000000 +#define long_format _PyLong_Format +#else +static PyObject* +long_format(PyObject* value, int base) +{ + /* Convert to base, don't add trailing 'L', and use the new octal + format. We already know this is a long object */ + assert(PyLong_Check(value)); + /* convert to base, don't add 'L', and use the new octal format */ + return _PyLong_Format(value, base, 0, 1); +} +#endif + +PyObject * +FORMAT_LONG(PyObject* value, PyObject* args) +{ + return format_int_or_long(value, args, long_format); +} +#endif /* FORMAT_LONG */ + +#ifdef FORMAT_INT +/* this is only used for 2.x, not 3.0 */ +static PyObject* +int_format(PyObject* value, int base) +{ + /* Convert to base, and use the new octal format. We already + know this is an int object */ + assert(PyInt_Check(value)); + return _PyInt_Format((PyIntObject*)value, base, 1); +} + +PyObject * +FORMAT_INT(PyObject* value, PyObject* args) +{ + return format_int_or_long(value, args, int_format); +} +#endif /* FORMAT_INT */ + +#ifdef FORMAT_FLOAT +PyObject * +FORMAT_FLOAT(PyObject *value, PyObject *args) +{ + PyObject *format_spec; + PyObject *result = NULL; + InternalFormatSpec format; + + if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__", &format_spec)) + goto done; + + /* check for the special case of zero length format spec, make + it equivalent to str(value) */ + if (STRINGLIB_LEN(format_spec) == 0) { + result = STRINGLIB_TOSTR(value); + goto done; + } + + /* parse the format_spec */ + if (!parse_internal_render_format_spec(format_spec, &format, 'g')) + goto done; + + /* type conversion? */ + switch (format.type) { + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + case 'n': + case '%': + /* no conversion, already a float. do the formatting */ + result = format_float_internal(value, &format); + break; + + default: + /* unknown */ + PyErr_Format(PyExc_ValueError, "Unknown conversion type %c", + format.type); + goto done; + } + +done: + return result; +} +#endif /* FORMAT_FLOAT */ diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h new file mode 100644 index 0000000..70f8f13 --- /dev/null +++ b/Objects/stringlib/string_format.h @@ -0,0 +1,1214 @@ +/* + string_format.h -- implementation of string.format(). + + It uses the Objects/stringlib conventions, so that it can be + compiled for both unicode and string objects. +*/ + + +/* Defines for Python 2.6 compatability */ +#if PY_VERSION_HEX < 0x03000000 +#define PyLong_FromSsize_t _PyLong_FromSsize_t +#endif + +/* Defines for more efficiently reallocating the string buffer */ +#define INITIAL_SIZE_INCREMENT 100 +#define SIZE_MULTIPLIER 2 +#define MAX_SIZE_INCREMENT 3200 + + +/************************************************************************/ +/*********** Global data structures and forward declarations *********/ +/************************************************************************/ + +/* + A SubString consists of the characters between two string or + unicode pointers. +*/ +typedef struct { + STRINGLIB_CHAR *ptr; + STRINGLIB_CHAR *end; +} SubString; + + +/* forward declaration for recursion */ +static PyObject * +build_string(SubString *input, PyObject *args, PyObject *kwargs, + int recursion_depth); + + + +/************************************************************************/ +/************************** Utility functions ************************/ +/************************************************************************/ + +/* fill in a SubString from a pointer and length */ +Py_LOCAL_INLINE(void) +SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len) +{ + str->ptr = p; + if (p == NULL) + str->end = NULL; + else + str->end = str->ptr + len; +} + +/* return a new string. if str->ptr is NULL, return None */ +Py_LOCAL_INLINE(PyObject *) +SubString_new_object(SubString *str) +{ + if (str->ptr == NULL) { + Py_INCREF(Py_None); + return Py_None; + } + return STRINGLIB_NEW(str->ptr, str->end - str->ptr); +} + +/* return a new string. if str->ptr is NULL, return None */ +Py_LOCAL_INLINE(PyObject *) +SubString_new_object_or_empty(SubString *str) +{ + if (str->ptr == NULL) { + return STRINGLIB_NEW(NULL, 0); + } + return STRINGLIB_NEW(str->ptr, str->end - str->ptr); +} + +/************************************************************************/ +/*********** Output string management functions ****************/ +/************************************************************************/ + +typedef struct { + STRINGLIB_CHAR *ptr; + STRINGLIB_CHAR *end; + PyObject *obj; + Py_ssize_t size_increment; +} OutputString; + +/* initialize an OutputString object, reserving size characters */ +static int +output_initialize(OutputString *output, Py_ssize_t size) +{ + output->obj = STRINGLIB_NEW(NULL, size); + if (output->obj == NULL) + return 0; + + output->ptr = STRINGLIB_STR(output->obj); + output->end = STRINGLIB_LEN(output->obj) + output->ptr; + output->size_increment = INITIAL_SIZE_INCREMENT; + + return 1; +} + +/* + output_extend reallocates the output string buffer. + It returns a status: 0 for a failed reallocation, + 1 for success. +*/ + +static int +output_extend(OutputString *output, Py_ssize_t count) +{ + STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj); + Py_ssize_t curlen = output->ptr - startptr; + Py_ssize_t maxlen = curlen + count + output->size_increment; + + if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0) + return 0; + startptr = STRINGLIB_STR(output->obj); + output->ptr = startptr + curlen; + output->end = startptr + maxlen; + if (output->size_increment < MAX_SIZE_INCREMENT) + output->size_increment *= SIZE_MULTIPLIER; + return 1; +} + +/* + output_data dumps characters into our output string + buffer. + + In some cases, it has to reallocate the string. + + It returns a status: 0 for a failed reallocation, + 1 for success. +*/ +static int +output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count) +{ + if ((count > output->end - output->ptr) && !output_extend(output, count)) + return 0; + memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR)); + output->ptr += count; + return 1; +} + +/************************************************************************/ +/*********** Format string parsing -- integers and identifiers *********/ +/************************************************************************/ + +static Py_ssize_t +get_integer(const SubString *str) +{ + Py_ssize_t accumulator = 0; + Py_ssize_t digitval; + Py_ssize_t oldaccumulator; + STRINGLIB_CHAR *p; + + /* empty string is an error */ + if (str->ptr >= str->end) + return -1; + + for (p = str->ptr; p < str->end; p++) { + digitval = STRINGLIB_TODECIMAL(*p); + if (digitval < 0) + return -1; + /* + This trick was copied from old Unicode format code. It's cute, + but would really suck on an old machine with a slow divide + implementation. Fortunately, in the normal case we do not + expect too many digits. + */ + oldaccumulator = accumulator; + accumulator *= 10; + if ((accumulator+10)/10 != oldaccumulator+1) { + PyErr_Format(PyExc_ValueError, + "Too many decimal digits in format string"); + return -1; + } + accumulator += digitval; + } + return accumulator; +} + +/************************************************************************/ +/******** Functions to get field objects and specification strings ******/ +/************************************************************************/ + +/* do the equivalent of obj.name */ +static PyObject * +getattr(PyObject *obj, SubString *name) +{ + PyObject *newobj; + PyObject *str = SubString_new_object(name); + if (str == NULL) + return NULL; + newobj = PyObject_GetAttr(obj, str); + Py_DECREF(str); + return newobj; +} + +/* do the equivalent of obj[idx], where obj is a sequence */ +static PyObject * +getitem_sequence(PyObject *obj, Py_ssize_t idx) +{ + return PySequence_GetItem(obj, idx); +} + +/* do the equivalent of obj[idx], where obj is not a sequence */ +static PyObject * +getitem_idx(PyObject *obj, Py_ssize_t idx) +{ + PyObject *newobj; + PyObject *idx_obj = PyLong_FromSsize_t(idx); + if (idx_obj == NULL) + return NULL; + newobj = PyObject_GetItem(obj, idx_obj); + Py_DECREF(idx_obj); + return newobj; +} + +/* do the equivalent of obj[name] */ +static PyObject * +getitem_str(PyObject *obj, SubString *name) +{ + PyObject *newobj; + PyObject *str = SubString_new_object(name); + if (str == NULL) + return NULL; + newobj = PyObject_GetItem(obj, str); + Py_DECREF(str); + return newobj; +} + +typedef struct { + /* the entire string we're parsing. we assume that someone else + is managing its lifetime, and that it will exist for the + lifetime of the iterator. can be empty */ + SubString str; + + /* pointer to where we are inside field_name */ + STRINGLIB_CHAR *ptr; +} FieldNameIterator; + + +static int +FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr, + Py_ssize_t len) +{ + SubString_init(&self->str, ptr, len); + self->ptr = self->str.ptr; + return 1; +} + +static int +_FieldNameIterator_attr(FieldNameIterator *self, SubString *name) +{ + STRINGLIB_CHAR c; + + name->ptr = self->ptr; + + /* return everything until '.' or '[' */ + while (self->ptr < self->str.end) { + switch (c = *self->ptr++) { + case '[': + case '.': + /* backup so that we this character will be seen next time */ + self->ptr--; + break; + default: + continue; + } + break; + } + /* end of string is okay */ + name->end = self->ptr; + return 1; +} + +static int +_FieldNameIterator_item(FieldNameIterator *self, SubString *name) +{ + int bracket_seen = 0; + STRINGLIB_CHAR c; + + name->ptr = self->ptr; + + /* return everything until ']' */ + while (self->ptr < self->str.end) { + switch (c = *self->ptr++) { + case ']': + bracket_seen = 1; + break; + default: + continue; + } + break; + } + /* make sure we ended with a ']' */ + if (!bracket_seen) { + PyErr_SetString(PyExc_ValueError, "Missing ']' in format string"); + return 0; + } + + /* end of string is okay */ + /* don't include the ']' */ + name->end = self->ptr-1; + return 1; +} + +/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ +static int +FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, + Py_ssize_t *name_idx, SubString *name) +{ + /* check at end of input */ + if (self->ptr >= self->str.end) + return 1; + + switch (*self->ptr++) { + case '.': + *is_attribute = 1; + if (_FieldNameIterator_attr(self, name) == 0) + return 0; + *name_idx = -1; + break; + case '[': + *is_attribute = 0; + if (_FieldNameIterator_item(self, name) == 0) + return 0; + *name_idx = get_integer(name); + break; + default: + /* interal error, can't get here */ + assert(0); + return 0; + } + + /* empty string is an error */ + if (name->ptr == name->end) { + PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); + return 0; + } + + return 2; +} + + +/* input: field_name + output: 'first' points to the part before the first '[' or '.' + 'first_idx' is -1 if 'first' is not an integer, otherwise + it's the value of first converted to an integer + 'rest' is an iterator to return the rest +*/ +static int +field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first, + Py_ssize_t *first_idx, FieldNameIterator *rest) +{ + STRINGLIB_CHAR c; + STRINGLIB_CHAR *p = ptr; + STRINGLIB_CHAR *end = ptr + len; + + /* find the part up until the first '.' or '[' */ + while (p < end) { + switch (c = *p++) { + case '[': + case '.': + /* backup so that we this character is available to the + "rest" iterator */ + p--; + break; + default: + continue; + } + break; + } + + /* set up the return values */ + SubString_init(first, ptr, p - ptr); + FieldNameIterator_init(rest, p, end - p); + + /* see if "first" is an integer, in which case it's used as an index */ + *first_idx = get_integer(first); + + /* zero length string is an error */ + if (first->ptr >= first->end) { + PyErr_SetString(PyExc_ValueError, "empty field name"); + goto error; + } + + return 1; +error: + return 0; +} + + +/* + get_field_object returns the object inside {}, before the + format_spec. It handles getindex and getattr lookups and consumes + the entire input string. +*/ +static PyObject * +get_field_object(SubString *input, PyObject *args, PyObject *kwargs) +{ + PyObject *obj = NULL; + int ok; + int is_attribute; + SubString name; + SubString first; + Py_ssize_t index; + FieldNameIterator rest; + + if (!field_name_split(input->ptr, input->end - input->ptr, &first, + &index, &rest)) { + goto error; + } + + if (index == -1) { + /* look up in kwargs */ + PyObject *key = SubString_new_object(&first); + if (key == NULL) + goto error; + if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) { + PyErr_SetObject(PyExc_KeyError, key); + Py_DECREF(key); + goto error; + } + Py_DECREF(key); + Py_INCREF(obj); + } + else { + /* look up in args */ + obj = PySequence_GetItem(args, index); + if (obj == NULL) + goto error; + } + + /* iterate over the rest of the field_name */ + while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, + &name)) == 2) { + PyObject *tmp; + + if (is_attribute) + /* getattr lookup "." */ + tmp = getattr(obj, &name); + else + /* getitem lookup "[]" */ + if (index == -1) + tmp = getitem_str(obj, &name); + else + if (PySequence_Check(obj)) + tmp = getitem_sequence(obj, index); + else + /* not a sequence */ + tmp = getitem_idx(obj, index); + if (tmp == NULL) + goto error; + + /* assign to obj */ + Py_DECREF(obj); + obj = tmp; + } + /* end of iterator, this is the non-error case */ + if (ok == 1) + return obj; +error: + Py_XDECREF(obj); + return NULL; +} + +/************************************************************************/ +/***************** Field rendering functions **************************/ +/************************************************************************/ + +/* + render_field() is the main function in this section. It takes the + field object and field specification string generated by + get_field_and_spec, and renders the field into the output string. + + render_field calls fieldobj.__format__(format_spec) method, and + appends to the output. +*/ +static int +render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output) +{ + int ok = 0; + PyObject *result = NULL; + + /* we need to create an object out of the pointers we have */ + PyObject *format_spec_object = SubString_new_object_or_empty(format_spec); + if (format_spec_object == NULL) + goto done; + + result = PyObject_Format(fieldobj, format_spec_object); + if (result == NULL) + goto done; + + ok = output_data(output, + STRINGLIB_STR(result), STRINGLIB_LEN(result)); +done: + Py_DECREF(format_spec_object); + Py_XDECREF(result); + return ok; +} + +static int +parse_field(SubString *str, SubString *field_name, SubString *format_spec, + STRINGLIB_CHAR *conversion) +{ + STRINGLIB_CHAR c = 0; + + /* initialize these, as they may be empty */ + *conversion = '\0'; + SubString_init(format_spec, NULL, 0); + + /* search for the field name. it's terminated by the end of the + string, or a ':' or '!' */ + field_name->ptr = str->ptr; + while (str->ptr < str->end) { + switch (c = *(str->ptr++)) { + case ':': + case '!': + break; + default: + continue; + } + break; + } + + if (c == '!' || c == ':') { + /* we have a format specifier and/or a conversion */ + /* don't include the last character */ + field_name->end = str->ptr-1; + + /* the format specifier is the rest of the string */ + format_spec->ptr = str->ptr; + format_spec->end = str->end; + + /* see if there's a conversion specifier */ + if (c == '!') { + /* there must be another character present */ + if (format_spec->ptr >= format_spec->end) { + PyErr_SetString(PyExc_ValueError, + "end of format while looking for conversion " + "specifier"); + return 0; + } + *conversion = *(format_spec->ptr++); + + /* if there is another character, it must be a colon */ + if (format_spec->ptr < format_spec->end) { + c = *(format_spec->ptr++); + if (c != ':') { + PyErr_SetString(PyExc_ValueError, + "expected ':' after format specifier"); + return 0; + } + } + } + + return 1; + + } + else { + /* end of string, there's no format_spec or conversion */ + field_name->end = str->ptr; + return 1; + } +} + +/************************************************************************/ +/******* Output string allocation and escape-to-markup processing ******/ +/************************************************************************/ + +/* MarkupIterator breaks the string into pieces of either literal + text, or things inside {} that need to be marked up. it is + designed to make it easy to wrap a Python iterator around it, for + use with the Formatter class */ + +typedef struct { + SubString str; +} MarkupIterator; + +static int +MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len) +{ + SubString_init(&self->str, ptr, len); + return 1; +} + +/* returns 0 on error, 1 on non-error termination, and 2 if it got a + string (or something to be expanded) */ +static int +MarkupIterator_next(MarkupIterator *self, SubString *literal, + SubString *field_name, SubString *format_spec, + STRINGLIB_CHAR *conversion, + int *format_spec_needs_expanding) +{ + int at_end; + STRINGLIB_CHAR c = 0; + STRINGLIB_CHAR *start; + int count; + Py_ssize_t len; + int markup_follows = 0; + + /* initialize all of the output variables */ + SubString_init(literal, NULL, 0); + SubString_init(field_name, NULL, 0); + SubString_init(format_spec, NULL, 0); + *conversion = '\0'; + *format_spec_needs_expanding = 0; + + /* No more input, end of iterator. This is the normal exit + path. */ + if (self->str.ptr >= self->str.end) + return 1; + + start = self->str.ptr; + + /* First read any literal text. Read until the end of string, an + escaped '{' or '}', or an unescaped '{'. In order to never + allocate memory and so I can just pass pointers around, if + there's an escaped '{' or '}' then we'll return the literal + including the brace, but no format object. The next time + through, we'll return the rest of the literal, skipping past + the second consecutive brace. */ + while (self->str.ptr < self->str.end) { + switch (c = *(self->str.ptr++)) { + case '{': + case '}': + markup_follows = 1; + break; + default: + continue; + } + break; + } + + at_end = self->str.ptr >= self->str.end; + len = self->str.ptr - start; + + if ((c == '}') && (at_end || (c != *self->str.ptr))) { + PyErr_SetString(PyExc_ValueError, "Single '}' encountered " + "in format string"); + return 0; + } + if (at_end && c == '{') { + PyErr_SetString(PyExc_ValueError, "Single '{' encountered " + "in format string"); + return 0; + } + if (!at_end) { + if (c == *self->str.ptr) { + /* escaped } or {, skip it in the input. there is no + markup object following us, just this literal text */ + self->str.ptr++; + markup_follows = 0; + } + else + len--; + } + + /* record the literal text */ + literal->ptr = start; + literal->end = start + len; + + if (!markup_follows) + return 2; + + /* this is markup, find the end of the string by counting nested + braces. note that this prohibits escaped braces, so that + format_specs cannot have braces in them. */ + count = 1; + + start = self->str.ptr; + + /* we know we can't have a zero length string, so don't worry + about that case */ + while (self->str.ptr < self->str.end) { + switch (c = *(self->str.ptr++)) { + case '{': + /* the format spec needs to be recursively expanded. + this is an optimization, and not strictly needed */ + *format_spec_needs_expanding = 1; + count++; + break; + case '}': + count--; + if (count <= 0) { + /* we're done. parse and get out */ + SubString s; + + SubString_init(&s, start, self->str.ptr - 1 - start); + if (parse_field(&s, field_name, format_spec, conversion) == 0) + return 0; + + /* a zero length field_name is an error */ + if (field_name->ptr == field_name->end) { + PyErr_SetString(PyExc_ValueError, "zero length field name " + "in format"); + return 0; + } + + /* success */ + return 2; + } + break; + } + } + + /* end of string while searching for matching '}' */ + PyErr_SetString(PyExc_ValueError, "unmatched '{' in format"); + return 0; +} + + +/* do the !r or !s conversion on obj */ +static PyObject * +do_conversion(PyObject *obj, STRINGLIB_CHAR conversion) +{ + /* XXX in pre-3.0, do we need to convert this to unicode, since it + might have returned a string? */ + switch (conversion) { + case 'r': + return PyObject_Repr(obj); + case 's': + return STRINGLIB_TOSTR(obj); + default: + PyErr_Format(PyExc_ValueError, + "Unknown converion specifier %c", + conversion); + return NULL; + } +} + +/* given: + + {field_name!conversion:format_spec} + + compute the result and write it to output. + format_spec_needs_expanding is an optimization. if it's false, + just output the string directly, otherwise recursively expand the + format_spec string. */ + +static int +output_markup(SubString *field_name, SubString *format_spec, + int format_spec_needs_expanding, STRINGLIB_CHAR conversion, + OutputString *output, PyObject *args, PyObject *kwargs, + int recursion_depth) +{ + PyObject *tmp = NULL; + PyObject *fieldobj = NULL; + SubString expanded_format_spec; + SubString *actual_format_spec; + int result = 0; + + /* convert field_name to an object */ + fieldobj = get_field_object(field_name, args, kwargs); + if (fieldobj == NULL) + goto done; + + if (conversion != '\0') { + tmp = do_conversion(fieldobj, conversion); + if (tmp == NULL) + goto done; + + /* do the assignment, transferring ownership: fieldobj = tmp */ + Py_DECREF(fieldobj); + fieldobj = tmp; + tmp = NULL; + } + + /* if needed, recurively compute the format_spec */ + if (format_spec_needs_expanding) { + tmp = build_string(format_spec, args, kwargs, recursion_depth-1); + if (tmp == NULL) + goto done; + + /* note that in the case we're expanding the format string, + tmp must be kept around until after the call to + render_field. */ + SubString_init(&expanded_format_spec, + STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp)); + actual_format_spec = &expanded_format_spec; + } + else + actual_format_spec = format_spec; + + if (render_field(fieldobj, actual_format_spec, output) == 0) + goto done; + + result = 1; + +done: + Py_XDECREF(fieldobj); + Py_XDECREF(tmp); + + return result; +} + +/* + do_markup is the top-level loop for the format() method. It + searches through the format string for escapes to markup codes, and + calls other functions to move non-markup text to the output, + and to perform the markup to the output. +*/ +static int +do_markup(SubString *input, PyObject *args, PyObject *kwargs, + OutputString *output, int recursion_depth) +{ + MarkupIterator iter; + int format_spec_needs_expanding; + int result; + SubString literal; + SubString field_name; + SubString format_spec; + STRINGLIB_CHAR conversion; + + MarkupIterator_init(&iter, input->ptr, input->end - input->ptr); + while ((result = MarkupIterator_next(&iter, &literal, &field_name, + &format_spec, &conversion, + &format_spec_needs_expanding)) == 2) { + if (!output_data(output, literal.ptr, literal.end - literal.ptr)) + return 0; + if (field_name.ptr != field_name.end) + if (!output_markup(&field_name, &format_spec, + format_spec_needs_expanding, conversion, output, + args, kwargs, recursion_depth)) + return 0; + } + return result; +} + + +/* + build_string allocates the output string and then + calls do_markup to do the heavy lifting. +*/ +static PyObject * +build_string(SubString *input, PyObject *args, PyObject *kwargs, + int recursion_depth) +{ + OutputString output; + PyObject *result = NULL; + Py_ssize_t count; + + output.obj = NULL; /* needed so cleanup code always works */ + + /* check the recursion level */ + if (recursion_depth <= 0) { + PyErr_SetString(PyExc_ValueError, + "Max string recursion exceeded"); + goto done; + } + + /* initial size is the length of the format string, plus the size + increment. seems like a reasonable default */ + if (!output_initialize(&output, + input->end - input->ptr + + INITIAL_SIZE_INCREMENT)) + goto done; + + if (!do_markup(input, args, kwargs, &output, recursion_depth)) { + goto done; + } + + count = output.ptr - STRINGLIB_STR(output.obj); + if (STRINGLIB_RESIZE(&output.obj, count) < 0) { + goto done; + } + + /* transfer ownership to result */ + result = output.obj; + output.obj = NULL; + +done: + Py_XDECREF(output.obj); + return result; +} + +/************************************************************************/ +/*********** main routine ***********************************************/ +/************************************************************************/ + +/* this is the main entry point */ +static PyObject * +do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) +{ + SubString input; + + /* PEP 3101 says only 2 levels, so that + "{0:{1}}".format('abc', 's') # works + "{0:{1:{2}}}".format('abc', 's', '') # fails + */ + int recursion_depth = 2; + + SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self)); + return build_string(&input, args, kwargs, recursion_depth); +} + + + +/************************************************************************/ +/*********** formatteriterator ******************************************/ +/************************************************************************/ + +/* This is used to implement string.Formatter.vparse(). It exists so + Formatter can share code with the built in unicode.format() method. + It's really just a wrapper around MarkupIterator that is callable + from Python. */ + +typedef struct { + PyObject_HEAD + + STRINGLIB_OBJECT *str; + + MarkupIterator it_markup; +} formatteriterobject; + +static void +formatteriter_dealloc(formatteriterobject *it) +{ + Py_XDECREF(it->str); + PyObject_FREE(it); +} + +/* returns a tuple: + (literal, field_name, format_spec, conversion) + + literal is any literal text to output. might be zero length + field_name is the string before the ':'. might be None + format_spec is the string after the ':'. mibht be None + conversion is either None, or the string after the '!' +*/ +static PyObject * +formatteriter_next(formatteriterobject *it) +{ + SubString literal; + SubString field_name; + SubString format_spec; + STRINGLIB_CHAR conversion; + int format_spec_needs_expanding; + int result = MarkupIterator_next(&it->it_markup, &literal, &field_name, + &format_spec, &conversion, + &format_spec_needs_expanding); + + /* all of the SubString objects point into it->str, so no + memory management needs to be done on them */ + assert(0 <= result && result <= 2); + if (result == 0 || result == 1) + /* if 0, error has already been set, if 1, iterator is empty */ + return NULL; + else { + PyObject *literal_str = NULL; + PyObject *field_name_str = NULL; + PyObject *format_spec_str = NULL; + PyObject *conversion_str = NULL; + PyObject *tuple = NULL; + int has_field = field_name.ptr != field_name.end; + + literal_str = SubString_new_object(&literal); + if (literal_str == NULL) + goto done; + + field_name_str = SubString_new_object(&field_name); + if (field_name_str == NULL) + goto done; + + /* if field_name is non-zero length, return a string for + format_spec (even if zero length), else return None */ + format_spec_str = (has_field ? + SubString_new_object_or_empty : + SubString_new_object)(&format_spec); + if (format_spec_str == NULL) + goto done; + + /* if the conversion is not specified, return a None, + otherwise create a one length string with the conversion + character */ + if (conversion == '\0') { + conversion_str = Py_None; + Py_INCREF(conversion_str); + } + else + conversion_str = STRINGLIB_NEW(&conversion, 1); + if (conversion_str == NULL) + goto done; + + tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, + conversion_str); + done: + Py_XDECREF(literal_str); + Py_XDECREF(field_name_str); + Py_XDECREF(format_spec_str); + Py_XDECREF(conversion_str); + return tuple; + } +} + +static PyMethodDef formatteriter_methods[] = { + {NULL, NULL} /* sentinel */ +}; + +static PyTypeObject PyFormatterIter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "formatteriterator", /* tp_name */ + sizeof(formatteriterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)formatteriter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)formatteriter_next, /* tp_iternext */ + formatteriter_methods, /* tp_methods */ + 0, +}; + +/* unicode_formatter_parser is used to implement + string.Formatter.vformat. it parses a string and returns tuples + describing the parsed elements. It's a wrapper around + stringlib/string_format.h's MarkupIterator */ +static PyObject * +formatter_parser(STRINGLIB_OBJECT *self) +{ + formatteriterobject *it; + + it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); + if (it == NULL) + return NULL; + + /* take ownership, give the object to the iterator */ + Py_INCREF(self); + it->str = self; + + /* initialize the contained MarkupIterator */ + MarkupIterator_init(&it->it_markup, + STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + + return (PyObject *)it; +} + + +/************************************************************************/ +/*********** fieldnameiterator ******************************************/ +/************************************************************************/ + + +/* This is used to implement string.Formatter.vparse(). It parses the + field name into attribute and item values. It's a Python-callable + wrapper around FieldNameIterator */ + +typedef struct { + PyObject_HEAD + + STRINGLIB_OBJECT *str; + + FieldNameIterator it_field; +} fieldnameiterobject; + +static void +fieldnameiter_dealloc(fieldnameiterobject *it) +{ + Py_XDECREF(it->str); + PyObject_FREE(it); +} + +/* returns a tuple: + (is_attr, value) + is_attr is true if we used attribute syntax (e.g., '.foo') + false if we used index syntax (e.g., '[foo]') + value is an integer or string +*/ +static PyObject * +fieldnameiter_next(fieldnameiterobject *it) +{ + int result; + int is_attr; + Py_ssize_t idx; + SubString name; + + result = FieldNameIterator_next(&it->it_field, &is_attr, + &idx, &name); + if (result == 0 || result == 1) + /* if 0, error has already been set, if 1, iterator is empty */ + return NULL; + else { + PyObject* result = NULL; + PyObject* is_attr_obj = NULL; + PyObject* obj = NULL; + + is_attr_obj = PyBool_FromLong(is_attr); + if (is_attr_obj == NULL) + goto done; + + /* either an integer or a string */ + if (idx != -1) + obj = PyLong_FromSsize_t(idx); + else + obj = SubString_new_object(&name); + if (obj == NULL) + goto done; + + /* return a tuple of values */ + result = PyTuple_Pack(2, is_attr_obj, obj); + + done: + Py_XDECREF(is_attr_obj); + Py_XDECREF(obj); + return result; + } +} + +static PyMethodDef fieldnameiter_methods[] = { + {NULL, NULL} /* sentinel */ +}; + +static PyTypeObject PyFieldNameIter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "fieldnameiterator", /* tp_name */ + sizeof(fieldnameiterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)fieldnameiter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)fieldnameiter_next, /* tp_iternext */ + fieldnameiter_methods, /* tp_methods */ + 0}; + +/* unicode_formatter_field_name_split is used to implement + string.Formatter.vformat. it takes an PEP 3101 "field name", and + returns a tuple of (first, rest): "first", the part before the + first '.' or '['; and "rest", an iterator for the rest of the field + name. it's a wrapper around stringlib/string_format.h's + field_name_split. The iterator it returns is a + FieldNameIterator */ +static PyObject * +formatter_field_name_split(STRINGLIB_OBJECT *self) +{ + SubString first; + Py_ssize_t first_idx; + fieldnameiterobject *it; + + PyObject *first_obj = NULL; + PyObject *result = NULL; + + it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); + if (it == NULL) + return NULL; + + /* take ownership, give the object to the iterator. this is + just to keep the field_name alive */ + Py_INCREF(self); + it->str = self; + + if (!field_name_split(STRINGLIB_STR(self), + STRINGLIB_LEN(self), + &first, &first_idx, &it->it_field)) + goto done; + + /* first becomes an integer, if possible; else a string */ + if (first_idx != -1) + first_obj = PyLong_FromSsize_t(first_idx); + else + /* convert "first" into a string object */ + first_obj = SubString_new_object(&first); + if (first_obj == NULL) + goto done; + + /* return a tuple of values */ + result = PyTuple_Pack(2, first_obj, it); + +done: + Py_XDECREF(it); + Py_XDECREF(first_obj); + return result; +} diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h new file mode 100644 index 0000000..1e0df0f --- /dev/null +++ b/Objects/stringlib/stringdefs.h @@ -0,0 +1,27 @@ +#ifndef STRINGLIB_STRINGDEFS_H +#define STRINGLIB_STRINGDEFS_H + +/* this is sort of a hack. there's at least one place (formatting + floats) where some stringlib code takes a different path if it's + compiled as unicode. */ +#define STRINGLIB_IS_UNICODE 0 + +#define STRINGLIB_OBJECT PyStringObject +#define STRINGLIB_CHAR char +#define STRINGLIB_TYPE_NAME "string" +#define STRINGLIB_PARSE_CODE "S" +#define STRINGLIB_EMPTY nullstring +#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9')) +#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1) +#define STRINGLIB_TOUPPER toupper +#define STRINGLIB_TOLOWER tolower +#define STRINGLIB_FILL memset +#define STRINGLIB_STR PyString_AS_STRING +#define STRINGLIB_LEN PyString_GET_SIZE +#define STRINGLIB_NEW PyString_FromStringAndSize +#define STRINGLIB_RESIZE _PyString_Resize +#define STRINGLIB_CHECK PyString_Check +#define STRINGLIB_CMP memcmp +#define STRINGLIB_TOSTR PyObject_Str + +#endif /* !STRINGLIB_STRINGDEFS_H */ diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h new file mode 100644 index 0000000..f402a98 --- /dev/null +++ b/Objects/stringlib/unicodedefs.h @@ -0,0 +1,52 @@ +#ifndef STRINGLIB_UNICODEDEFS_H +#define STRINGLIB_UNICODEDEFS_H + +/* this is sort of a hack. there's at least one place (formatting + floats) where some stringlib code takes a different path if it's + compiled as unicode. */ +#define STRINGLIB_IS_UNICODE 1 + +#define STRINGLIB_OBJECT PyUnicodeObject +#define STRINGLIB_CHAR Py_UNICODE +#define STRINGLIB_TYPE_NAME "unicode" +#define STRINGLIB_PARSE_CODE "U" +#define STRINGLIB_EMPTY unicode_empty +#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL +#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL +#define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER +#define STRINGLIB_TOLOWER Py_UNICODE_TOLOWER +#define STRINGLIB_FILL Py_UNICODE_FILL +#define STRINGLIB_STR PyUnicode_AS_UNICODE +#define STRINGLIB_LEN PyUnicode_GET_SIZE +#define STRINGLIB_NEW PyUnicode_FromUnicode +#define STRINGLIB_RESIZE PyUnicode_Resize +#define STRINGLIB_CHECK PyUnicode_Check + +#if PY_VERSION_HEX < 0x03000000 +#define STRINGLIB_TOSTR PyObject_Unicode +#else +#define STRINGLIB_TOSTR PyObject_Str +#endif + +#define STRINGLIB_WANT_CONTAINS_OBJ 1 + +/* STRINGLIB_CMP was defined as: + +Py_LOCAL_INLINE(int) +STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len) +{ + if (str[0] != other[0]) + return 1; + return memcmp((void*) str, (void*) other, len * sizeof(Py_UNICODE)); +} + +but unfortunately that gives a error if the function isn't used in a file that +includes this file. So, reluctantly convert it to a macro instead. */ + +#define STRINGLIB_CMP(str, other, len) \ + (((str)[0] != (other)[0]) ? \ + 1 : \ + memcmp((void*) (str), (void*) (other), (len) * sizeof(Py_UNICODE))) + + +#endif /* !STRINGLIB_UNICODEDEFS_H */ diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 21f59ac..392da93 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -4,6 +4,8 @@ #include "Python.h" +#include "formatter_string.h" + #include <ctype.h> #ifdef COUNT_ALLOCS @@ -771,15 +773,7 @@ PyString_AsStringAndSize(register PyObject *obj, /* -------------------------------------------------------------------- */ /* Methods */ -#define STRINGLIB_CHAR char - -#define STRINGLIB_CMP memcmp -#define STRINGLIB_LEN PyString_GET_SIZE -#define STRINGLIB_NEW PyString_FromStringAndSize -#define STRINGLIB_STR PyString_AS_STRING - -#define STRINGLIB_EMPTY nullstring - +#include "stringlib/stringdefs.h" #include "stringlib/fastsearch.h" #include "stringlib/count.h" @@ -3910,6 +3904,19 @@ string_getnewargs(PyStringObject *v) return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v)); } + +#include "stringlib/string_format.h" + +PyDoc_STRVAR(format__doc__, +"S.format(*args, **kwargs) -> unicode\n\ +\n\ +"); + +PyDoc_STRVAR(p_format__doc__, +"S.__format__(format_spec) -> unicode\n\ +\n\ +"); + static PyMethodDef string_methods[] = { @@ -3954,6 +3961,10 @@ string_methods[] = { {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__}, {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__}, {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__}, + {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, + {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__}, + {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS}, + {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS}, {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__}, {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__}, {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 073ee31..07ab61f 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3210,11 +3210,57 @@ object_reduce_ex(PyObject *self, PyObject *args) return _common_reduce(self, proto); } +/* + from PEP 3101, this code implements: + + class object: + def __format__(self, format_spec): + if isinstance(format_spec, str): + return format(str(self), format_spec) + elif isinstance(format_spec, unicode): + return format(unicode(self), format_spec) +*/ +static PyObject * +object_format(PyObject *self, PyObject *args) +{ + PyObject *format_spec; + PyObject *self_as_str = NULL; + PyObject *result = NULL; + PyObject *format_meth = NULL; + + if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) + return NULL; + if (PyUnicode_Check(format_spec)) { + self_as_str = PyObject_Unicode(self); + } else if (PyString_Check(format_spec)) { + self_as_str = PyObject_Str(self); + } else { + PyErr_SetString(PyExc_TypeError, "argument to __format__ must be unicode or str"); + return NULL; + } + + if (self_as_str != NULL) { + /* find the format function */ + format_meth = PyObject_GetAttrString(self_as_str, "__format__"); + if (format_meth != NULL) { + /* and call it */ + result = PyObject_CallFunctionObjArgs(format_meth, format_spec, NULL); + } + } + + Py_XDECREF(self_as_str); + Py_XDECREF(format_meth); + + return result; +} + static PyMethodDef object_methods[] = { {"__reduce_ex__", object_reduce_ex, METH_VARARGS, PyDoc_STR("helper for pickle")}, {"__reduce__", object_reduce, METH_VARARGS, PyDoc_STR("helper for pickle")}, + {"__format__", object_format, METH_VARARGS, + PyDoc_STR("default object formatter")}, {0} }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3e15f53..0dca976 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -42,6 +42,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #define PY_SSIZE_T_CLEAN #include "Python.h" +#include "formatter_unicode.h" + #include "unicodeobject.h" #include "ucnhash.h" @@ -5059,21 +5061,8 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s, /* --- Helpers ------------------------------------------------------------ */ -#define STRINGLIB_CHAR Py_UNICODE - -#define STRINGLIB_LEN PyUnicode_GET_SIZE -#define STRINGLIB_NEW PyUnicode_FromUnicode -#define STRINGLIB_STR PyUnicode_AS_UNICODE - -Py_LOCAL_INLINE(int) -STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len) -{ - if (str[0] != other[0]) - return 1; - return memcmp((void*) str, (void*) other, len * sizeof(Py_UNICODE)); -} +#include "stringlib/unicodedefs.h" -#define STRINGLIB_EMPTY unicode_empty #define FROM_UNICODE #include "stringlib/fastsearch.h" @@ -7802,6 +7791,19 @@ unicode_endswith(PyUnicodeObject *self, } +/* Implements do_string_format, which is unicode because of stringlib */ +#include "stringlib/string_format.h" + +PyDoc_STRVAR(format__doc__, +"S.format(*args, **kwargs) -> unicode\n\ +\n\ +"); + +PyDoc_STRVAR(p_format__doc__, +"S.__format__(format_spec) -> unicode\n\ +\n\ +"); + static PyObject * unicode_getnewargs(PyUnicodeObject *v) @@ -7855,6 +7857,10 @@ static PyMethodDef unicode_methods[] = { {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__}, {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__}, {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__}, + {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, + {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__}, + {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS}, + {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS}, #if 0 {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__}, #endif diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index fe691fc..a9d68ca 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -339,6 +339,24 @@ PyDoc_STRVAR(filter_doc, "or string, return the same type, else return a list."); static PyObject * +builtin_format(PyObject *self, PyObject *args) +{ + PyObject *value; + PyObject *format_spec = NULL; + + if (!PyArg_ParseTuple(args, "O|O:format", &value, &format_spec)) + return NULL; + + return PyObject_Format(value, format_spec); +} + +PyDoc_STRVAR(format_doc, +"format(value[, format_spec]) -> string\n\ +\n\ +Returns value.__format__(format_spec)\n\ +format_spec defaults to \"\""); + +static PyObject * builtin_chr(PyObject *self, PyObject *args) { long x; @@ -2359,6 +2377,7 @@ static PyMethodDef builtin_methods[] = { {"eval", builtin_eval, METH_VARARGS, eval_doc}, {"execfile", builtin_execfile, METH_VARARGS, execfile_doc}, {"filter", builtin_filter, METH_VARARGS, filter_doc}, + {"format", builtin_format, METH_VARARGS, format_doc}, {"getattr", builtin_getattr, METH_VARARGS, getattr_doc}, {"globals", (PyCFunction)builtin_globals, METH_NOARGS, globals_doc}, {"hasattr", builtin_hasattr, METH_VARARGS, hasattr_doc}, diff --git a/Python/formatter_string.c b/Python/formatter_string.c new file mode 100644 index 0000000..1041852 --- /dev/null +++ b/Python/formatter_string.c @@ -0,0 +1,15 @@ +/***********************************************************************/ +/* Implements the string (as opposed to unicode) version of the + built-in formatters for string, int, float. That is, the versions + of int.__float__, etc., that take and return string objects */ + +#include "Python.h" +#include "formatter_string.h" + +#include "../Objects/stringlib/stringdefs.h" + +#define FORMAT_STRING string__format__ +#define FORMAT_LONG string_long__format__ +#define FORMAT_INT string_int__format__ +#define FORMAT_FLOAT string_float__format__ +#include "../Objects/stringlib/formatter.h" diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c new file mode 100644 index 0000000..17c6944 --- /dev/null +++ b/Python/formatter_unicode.c @@ -0,0 +1,13 @@ +/* Implements the unicode (as opposed to string) version of the + built-in formatter for unicode. That is, unicode.__format__(). */ + +#include "Python.h" +#include "formatter_unicode.h" + +#include "../Objects/stringlib/unicodedefs.h" + +#define FORMAT_STRING unicode__format__ +/* don't define FORMAT_LONG and FORMAT_FLOAT, since we can live + with only the string versions of those. The builtin format() + will convert them to unicode. */ +#include "../Objects/stringlib/formatter.h" |