summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2015-01-27 20:18:34 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2015-01-27 20:18:34 (GMT)
commit3f95292be69ac09ed173e4241d220d30b1b059ff (patch)
treecb02282df238300dcc32084e57e49cf4d8fab87a /Lib
parentf18bf6fd2d2f0d7db6a5e5b4d86b709dd2b5ce6d (diff)
downloadcpython-3f95292be69ac09ed173e4241d220d30b1b059ff.zip
cpython-3f95292be69ac09ed173e4241d220d30b1b059ff.tar.gz
cpython-3f95292be69ac09ed173e4241d220d30b1b059ff.tar.bz2
Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
and fix by Guido Vranken.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_unicode.py145
1 files changed, 129 insertions, 16 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 47af8b9..21005eb 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1661,7 +1661,10 @@ class UnicodeTest(string_tests.CommonTest,
# Test PyUnicode_FromFormat()
def test_from_format(self):
support.import_module('ctypes')
- from ctypes import pythonapi, py_object, c_int
+ from ctypes import (
+ pythonapi, py_object, sizeof,
+ c_int, c_long, c_longlong, c_ssize_t,
+ c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
if sys.maxunicode == 65535:
name = "PyUnicodeUCS2_FromFormat"
else:
@@ -1675,9 +1678,13 @@ class UnicodeTest(string_tests.CommonTest,
for arg in args)
return _PyUnicode_FromFormat(format, *cargs)
+ def check_format(expected, format, *args):
+ text = PyUnicode_FromFormat(format, *args)
+ self.assertEqual(expected, text)
+
# ascii format, non-ascii argument
- text = PyUnicode_FromFormat(b'ascii\x7f=%U', 'unicode\xe9')
- self.assertEqual(text, 'ascii\x7f=unicode\xe9')
+ check_format('ascii\x7f=unicode\xe9',
+ b'ascii\x7f=%U', 'unicode\xe9')
# non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
# raises an error
@@ -1686,25 +1693,131 @@ class UnicodeTest(string_tests.CommonTest,
'string, got a non-ASCII byte: 0xe9$',
PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
- self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
- self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
-
- # other tests
- text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
- self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
-
- text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
- self.assertEqual(text, 'repr=abc')
+ # test "%c"
+ check_format('\uabcd',
+ b'%c', c_int(0xabcd))
+ check_format('\U0010ffff',
+ b'%c', c_int(0x10ffff))
+ with self.assertRaises(OverflowError):
+ PyUnicode_FromFormat(b'%c', c_int(0x110000))
+ # Issue #18183
+ check_format('\U00010000\U00100000',
+ b'%c%c', c_int(0x10000), c_int(0x100000))
+
+ # test "%"
+ check_format('%',
+ b'%')
+ check_format('%',
+ b'%%')
+ check_format('%s',
+ b'%%s')
+ check_format('[%]',
+ b'[%%]')
+ check_format('%abc',
+ b'%%%s', b'abc')
+
+ # test %S
+ check_format("repr=\u20acABC",
+ b'repr=%S', '\u20acABC')
+
+ # test %R
+ check_format("repr='\u20acABC'",
+ b'repr=%R', '\u20acABC')
+
+ # test integer formats (%i, %d, %u)
+ check_format('010',
+ b'%03i', c_int(10))
+ check_format('0010',
+ b'%0.4i', c_int(10))
+ check_format('-123',
+ b'%i', c_int(-123))
+
+ check_format('-123',
+ b'%d', c_int(-123))
+ check_format('-123',
+ b'%ld', c_long(-123))
+ check_format('-123',
+ b'%lld', c_longlong(-123))
+ check_format('-123',
+ b'%zd', c_ssize_t(-123))
+
+ check_format('123',
+ b'%u', c_uint(123))
+ check_format('123',
+ b'%lu', c_ulong(123))
+ check_format('123',
+ b'%llu', c_ulonglong(123))
+ check_format('123',
+ b'%zu', c_size_t(123))
+
+ # test long output
+ min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
+ max_longlong = -min_longlong - 1
+ check_format(str(min_longlong),
+ b'%lld', c_longlong(min_longlong))
+ check_format(str(max_longlong),
+ b'%lld', c_longlong(max_longlong))
+ max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
+ check_format(str(max_ulonglong),
+ b'%llu', c_ulonglong(max_ulonglong))
+ PyUnicode_FromFormat(b'%p', c_void_p(-1))
+
+ # test padding (width and/or precision)
+ check_format('123'.rjust(10, '0'),
+ b'%010i', c_int(123))
+ check_format('123'.rjust(100),
+ b'%100i', c_int(123))
+ check_format('123'.rjust(100, '0'),
+ b'%.100i', c_int(123))
+ check_format('123'.rjust(80, '0').rjust(100),
+ b'%100.80i', c_int(123))
+
+ check_format('123'.rjust(10, '0'),
+ b'%010u', c_uint(123))
+ check_format('123'.rjust(100),
+ b'%100u', c_uint(123))
+ check_format('123'.rjust(100, '0'),
+ b'%.100u', c_uint(123))
+ check_format('123'.rjust(80, '0').rjust(100),
+ b'%100.80u', c_uint(123))
+
+ check_format('123'.rjust(10, '0'),
+ b'%010x', c_int(0x123))
+ check_format('123'.rjust(100),
+ b'%100x', c_int(0x123))
+ check_format('123'.rjust(100, '0'),
+ b'%.100x', c_int(0x123))
+ check_format('123'.rjust(80, '0').rjust(100),
+ b'%100.80x', c_int(0x123))
+
+ # test %A
+ check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
+ b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
+
+ # test %V
+ check_format('repr=abc',
+ b'repr=%V', 'abc', b'xyz')
# Test string decode from parameter of %s using utf-8.
# b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
# '\u4eba\u6c11'
- text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
- self.assertEqual(text, 'repr=\u4eba\u6c11')
+ check_format('repr=\u4eba\u6c11',
+ b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
#Test replace error handler.
- text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff')
- self.assertEqual(text, 'repr=abc\ufffd')
+ check_format('repr=abc\ufffd',
+ b'repr=%V', None, b'abc\xff')
+
+ # not supported: copy the raw format string. these tests are just here
+ # to check for crashs and should not be considered as specifications
+ check_format('%s',
+ b'%1%s', b'abc')
+ check_format('%1abc',
+ b'%1abc')
+ check_format('%+i',
+ b'%+i', c_int(10))
+ check_format('%s',
+ b'%.%s', b'abc')
# Test PyUnicode_AsWideChar()
def test_aswidechar(self):