summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_bigmem.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_bigmem.py')
-rw-r--r--Lib/test/test_bigmem.py223
1 files changed, 148 insertions, 75 deletions
diff --git a/Lib/test/test_bigmem.py b/Lib/test/test_bigmem.py
index f3c6ebb..0e54595 100644
--- a/Lib/test/test_bigmem.py
+++ b/Lib/test/test_bigmem.py
@@ -1,3 +1,13 @@
+"""Bigmem tests - tests for the 32-bit boundary in containers.
+
+These tests try to exercise the 32-bit boundary that is sometimes, if
+rarely, exceeded in practice, but almost never tested. They are really only
+meaningful on 64-bit builds on machines with a *lot* of memory, but the
+tests are always run, usually with very low memory limits to make sure the
+tests themselves don't suffer from bitrot. To run them for real, pass a
+high memory limit to regrtest, with the -M option.
+"""
+
from test import support
from test.support import bigmemtest, _1G, _2G, _4G
@@ -6,20 +16,35 @@ import operator
import sys
import functools
+# These tests all use one of the bigmemtest decorators to indicate how much
+# memory they use and how much memory they need to be even meaningful. The
+# decorators take two arguments: a 'memuse' indicator declaring
+# (approximate) bytes per size-unit the test will use (at peak usage), and a
+# 'minsize' indicator declaring a minimum *useful* size. A test that
+# allocates a bytestring to test various operations near the end will have a
+# minsize of at least 2Gb (or it wouldn't reach the 32-bit limit, so the
+# test wouldn't be very useful) and a memuse of 1 (one byte per size-unit,
+# if it allocates only one big string at a time.)
+#
+# When run with a memory limit set, both decorators skip tests that need
+# more memory than available to be meaningful. The precisionbigmemtest will
+# always pass minsize as size, even if there is much more memory available.
+# The bigmemtest decorator will scale size upward to fill available memory.
+#
# Bigmem testing houserules:
#
# - Try not to allocate too many large objects. It's okay to rely on
-# refcounting semantics, but don't forget that 's = create_largestring()'
+# refcounting semantics, and don't forget that 's = create_largestring()'
# doesn't release the old 's' (if it exists) until well after its new
# value has been created. Use 'del s' before the create_largestring call.
#
-# - Do *not* compare large objects using assertEqual or similar. It's a
-# lengthy operation and the errormessage will be utterly useless due to
-# its size. To make sure whether a result has the right contents, better
-# to use the strip or count methods, or compare meaningful slices.
+# - Do *not* compare large objects using assertEqual, assertIn or similar.
+# It's a lengthy operation and the errormessage will be utterly useless
+# due to its size. To make sure whether a result has the right contents,
+# better to use the strip or count methods, or compare meaningful slices.
#
# - Don't forget to test for large indices, offsets and results and such,
-# in addition to large sizes.
+# in addition to large sizes. Anything that probes the 32-bit boundary.
#
# - When repeating an object (say, a substring, or a small list) to create
# a large object, make the subobject of a length that is not a power of
@@ -37,13 +62,14 @@ import functools
# fail as well. I do not know whether it is due to memory fragmentation
# issues, or other specifics of the platform malloc() routine.
-character_size = 4 if sys.maxunicode > 0xFFFF else 2
+ascii_char_size = 1
+ucs2_char_size = 2
+ucs4_char_size = 4
class BaseStrTest:
- @bigmemtest(size=_2G, memuse=2)
- def test_capitalize(self, size):
+ def _test_capitalize(self, size):
_ = self.from_latin1
SUBSTR = self.from_latin1(' abc def ghi')
s = _('-') * size + SUBSTR
@@ -92,7 +118,7 @@ class BaseStrTest:
_ = self.from_latin1
s = _('-') * size
tabsize = 8
- self.assertEqual(s.expandtabs(), s)
+ self.assertTrue(s.expandtabs() == s)
del s
slen, remainder = divmod(size, tabsize)
s = _(' \t') * slen
@@ -347,7 +373,7 @@ class BaseStrTest:
# suffer for the list size. (Otherwise, it'd cost another 48 times
# size in bytes!) Nevertheless, a list of size takes
# 8*size bytes.
- @bigmemtest(size=_2G + 5, memuse=10)
+ @bigmemtest(size=_2G + 5, memuse=2 * ascii_char_size + 8)
def test_split_large(self, size):
_ = self.from_latin1
s = _(' a') * size + _(' ')
@@ -366,9 +392,9 @@ class BaseStrTest:
# take up an inordinate amount of memory
chunksize = int(size ** 0.5 + 2) // 2
SUBSTR = _(' ') * chunksize + _('\n') + _(' ') * chunksize + _('\r\n')
- s = SUBSTR * chunksize
+ s = SUBSTR * (chunksize * 2)
l = s.splitlines()
- self.assertEqual(len(l), chunksize * 2)
+ self.assertEqual(len(l), chunksize * 4)
expected = _(' ') * chunksize
for item in l:
self.assertEqual(item, expected)
@@ -394,8 +420,7 @@ class BaseStrTest:
self.assertEqual(len(s), size)
self.assertEqual(s.strip(), SUBSTR.strip())
- @bigmemtest(size=_2G, memuse=2)
- def test_swapcase(self, size):
+ def _test_swapcase(self, size):
_ = self.from_latin1
SUBSTR = _("aBcDeFG12.'\xa9\x00")
sublen = len(SUBSTR)
@@ -406,8 +431,7 @@ class BaseStrTest:
self.assertEqual(s[:sublen * 3], SUBSTR.swapcase() * 3)
self.assertEqual(s[-sublen * 3:], SUBSTR.swapcase() * 3)
- @bigmemtest(size=_2G, memuse=2)
- def test_title(self, size):
+ def _test_title(self, size):
_ = self.from_latin1
SUBSTR = _('SpaaHAaaAaham')
s = SUBSTR * (size // len(SUBSTR) + 2)
@@ -419,14 +443,7 @@ class BaseStrTest:
def test_translate(self, size):
_ = self.from_latin1
SUBSTR = _('aZz.z.Aaz.')
- if isinstance(SUBSTR, str):
- trans = {
- ord(_('.')): _('-'),
- ord(_('a')): _('!'),
- ord(_('Z')): _('$'),
- }
- else:
- trans = bytes.maketrans(b'.aZ', b'-!$')
+ trans = bytes.maketrans(b'.aZ', b'-!$')
sublen = len(SUBSTR)
repeats = size // sublen + 2
s = SUBSTR * repeats
@@ -519,19 +536,19 @@ class BaseStrTest:
edge = _('-') * (size // 2)
s = _('').join([edge, SUBSTR, edge])
del edge
- self.assertIn(SUBSTR, s)
- self.assertNotIn(SUBSTR * 2, s)
- self.assertIn(_('-'), s)
- self.assertNotIn(_('a'), s)
+ self.assertTrue(SUBSTR in s)
+ self.assertFalse(SUBSTR * 2 in s)
+ self.assertTrue(_('-') in s)
+ self.assertFalse(_('a') in s)
s += _('a')
- self.assertIn(_('a'), s)
+ self.assertTrue(_('a') in s)
@bigmemtest(size=_2G + 10, memuse=2)
def test_compare(self, size):
_ = self.from_latin1
s1 = _('-') * size
s2 = _('-') * size
- self.assertEqual(s1, s2)
+ self.assertTrue(s1 == s2)
del s2
s2 = s1 + _('a')
self.assertFalse(s1 == s2)
@@ -552,7 +569,7 @@ class BaseStrTest:
h1 = hash(s)
del s
s = _('\x00') * (size + 1)
- self.assertFalse(h1 == hash(s))
+ self.assertNotEqual(h1, hash(s))
class StrTest(unittest.TestCase, BaseStrTest):
@@ -563,7 +580,6 @@ class StrTest(unittest.TestCase, BaseStrTest):
def basic_encode_test(self, size, enc, c='.', expectedsize=None):
if expectedsize is None:
expectedsize = size
-
try:
s = c * size
self.assertEqual(len(s.encode(enc)), expectedsize)
@@ -582,48 +598,64 @@ class StrTest(unittest.TestCase, BaseStrTest):
memuse = meth.memuse
except AttributeError:
continue
- meth.memuse = character_size * memuse
+ meth.memuse = ascii_char_size * memuse
self._adjusted[name] = memuse
def tearDown(self):
for name, memuse in self._adjusted.items():
getattr(type(self), name).memuse = memuse
- # the utf8 encoder preallocates big time (4x the number of characters)
- @bigmemtest(size=_2G + 2, memuse=character_size + 4)
+ @bigmemtest(size=_2G, memuse=ucs4_char_size * 3)
+ def test_capitalize(self, size):
+ self._test_capitalize(size)
+
+ @bigmemtest(size=_2G, memuse=ucs4_char_size * 3)
+ def test_title(self, size):
+ self._test_title(size)
+
+ @bigmemtest(size=_2G, memuse=ucs4_char_size * 3)
+ def test_swapcase(self, size):
+ self._test_swapcase(size)
+
+ # Many codecs convert to the legacy representation first, explaining
+ # why we add 'ucs4_char_size' to the 'memuse' below.
+
+ @bigmemtest(size=_2G + 2, memuse=ascii_char_size + 1)
def test_encode(self, size):
return self.basic_encode_test(size, 'utf-8')
- @bigmemtest(size=_4G // 6 + 2, memuse=character_size + 1)
+ @bigmemtest(size=_4G // 6 + 2, memuse=ascii_char_size + ucs4_char_size + 1)
def test_encode_raw_unicode_escape(self, size):
try:
return self.basic_encode_test(size, 'raw_unicode_escape')
except MemoryError:
pass # acceptable on 32-bit
- @bigmemtest(size=_4G // 5 + 70, memuse=character_size + 1)
+ @bigmemtest(size=_4G // 5 + 70, memuse=ascii_char_size + ucs4_char_size + 1)
def test_encode_utf7(self, size):
try:
return self.basic_encode_test(size, 'utf7')
except MemoryError:
pass # acceptable on 32-bit
- @bigmemtest(size=_4G // 4 + 5, memuse=character_size + 4)
+ @bigmemtest(size=_4G // 4 + 5, memuse=ascii_char_size + ucs4_char_size + 4)
def test_encode_utf32(self, size):
try:
- return self.basic_encode_test(size, 'utf32', expectedsize=4*size+4)
+ return self.basic_encode_test(size, 'utf32', expectedsize=4 * size + 4)
except MemoryError:
pass # acceptable on 32-bit
- @bigmemtest(size=_2G - 1, memuse=character_size + 1)
+ @bigmemtest(size=_2G - 1, memuse=ascii_char_size + 1)
def test_encode_ascii(self, size):
return self.basic_encode_test(size, 'ascii', c='A')
- @bigmemtest(size=_2G + 10, memuse=character_size * 2)
+ # str % (...) uses a Py_UCS4 intermediate representation
+
+ @bigmemtest(size=_2G + 10, memuse=ascii_char_size * 2 + ucs4_char_size)
def test_format(self, size):
s = '-' * size
sf = '%s' % (s,)
- self.assertEqual(s, sf)
+ self.assertTrue(s == sf)
del sf
sf = '..%s..' % (s,)
self.assertEqual(len(sf), len(s) + 4)
@@ -640,7 +672,7 @@ class StrTest(unittest.TestCase, BaseStrTest):
self.assertEqual(s.count('.'), 3)
self.assertEqual(s.count('-'), size * 2)
- @bigmemtest(size=_2G + 10, memuse=character_size * 2)
+ @bigmemtest(size=_2G + 10, memuse=ascii_char_size * 2)
def test_repr_small(self, size):
s = '-' * size
s = repr(s)
@@ -661,7 +693,7 @@ class StrTest(unittest.TestCase, BaseStrTest):
self.assertEqual(s.count('\\'), size)
self.assertEqual(s.count('0'), size * 2)
- @bigmemtest(size=_2G + 10, memuse=character_size * 5)
+ @bigmemtest(size=_2G + 10, memuse=ascii_char_size * 5)
def test_repr_large(self, size):
s = '\x00' * size
s = repr(s)
@@ -671,7 +703,13 @@ class StrTest(unittest.TestCase, BaseStrTest):
self.assertEqual(s.count('\\'), size)
self.assertEqual(s.count('0'), size * 2)
- @bigmemtest(size=_2G // 5 + 1, memuse=character_size * 7)
+ # ascii() calls encode('ascii', 'backslashreplace'), which itself
+ # creates a temporary Py_UNICODE representation in addition to the
+ # original (Py_UCS2) one
+ # There's also some overallocation when resizing the ascii() result
+ # that isn't taken into account here.
+ @bigmemtest(size=_2G // 5 + 1, memuse=ucs2_char_size +
+ ucs4_char_size + ascii_char_size * 6)
def test_unicode_repr(self, size):
# Use an assigned, but not printable code point.
# It is in the range of the low surrogates \uDC00-\uDFFF.
@@ -686,9 +724,7 @@ class StrTest(unittest.TestCase, BaseStrTest):
finally:
r = s = None
- # The character takes 4 bytes even in UCS-2 builds because it will
- # be decomposed into surrogates.
- @bigmemtest(size=_2G // 5 + 1, memuse=4 + character_size * 9)
+ @bigmemtest(size=_2G // 5 + 1, memuse=ucs4_char_size * 2 + ascii_char_size * 10)
def test_unicode_repr_wide(self, size):
char = "\U0001DCBA"
s = char * size
@@ -701,39 +737,76 @@ class StrTest(unittest.TestCase, BaseStrTest):
finally:
r = s = None
- @bigmemtest(size=_4G // 5, memuse=character_size * (6 + 1))
- def _test_unicode_repr_overflow(self, size):
- # XXX not sure what this test is about
- char = "\uDCBA"
- s = char * size
- try:
- r = repr(s)
- self.assertTrue(s == eval(r))
- finally:
- r = s = None
+ # The original test_translate is overriden here, so as to get the
+ # correct size estimate: str.translate() uses an intermediate Py_UCS4
+ # representation.
+
+ @bigmemtest(size=_2G, memuse=ascii_char_size * 2 + ucs4_char_size)
+ def test_translate(self, size):
+ _ = self.from_latin1
+ SUBSTR = _('aZz.z.Aaz.')
+ trans = {
+ ord(_('.')): _('-'),
+ ord(_('a')): _('!'),
+ ord(_('Z')): _('$'),
+ }
+ sublen = len(SUBSTR)
+ repeats = size // sublen + 2
+ s = SUBSTR * repeats
+ s = s.translate(trans)
+ self.assertEqual(len(s), repeats * sublen)
+ self.assertEqual(s[:sublen], SUBSTR.translate(trans))
+ self.assertEqual(s[-sublen:], SUBSTR.translate(trans))
+ self.assertEqual(s.count(_('.')), 0)
+ self.assertEqual(s.count(_('!')), repeats * 2)
+ self.assertEqual(s.count(_('z')), repeats * 3)
class BytesTest(unittest.TestCase, BaseStrTest):
def from_latin1(self, s):
- return s.encode("latin1")
+ return s.encode("latin-1")
- @bigmemtest(size=_2G + 2, memuse=1 + character_size)
+ @bigmemtest(size=_2G + 2, memuse=1 + ascii_char_size)
def test_decode(self, size):
s = self.from_latin1('.') * size
self.assertEqual(len(s.decode('utf-8')), size)
+ @bigmemtest(size=_2G, memuse=2)
+ def test_capitalize(self, size):
+ self._test_capitalize(size)
+
+ @bigmemtest(size=_2G, memuse=2)
+ def test_title(self, size):
+ self._test_title(size)
+
+ @bigmemtest(size=_2G, memuse=2)
+ def test_swapcase(self, size):
+ self._test_swapcase(size)
+
class BytearrayTest(unittest.TestCase, BaseStrTest):
def from_latin1(self, s):
- return bytearray(s.encode("latin1"))
+ return bytearray(s.encode("latin-1"))
- @bigmemtest(size=_2G + 2, memuse=1 + character_size)
+ @bigmemtest(size=_2G + 2, memuse=1 + ascii_char_size)
def test_decode(self, size):
s = self.from_latin1('.') * size
self.assertEqual(len(s.decode('utf-8')), size)
+ @bigmemtest(size=_2G, memuse=2)
+ def test_capitalize(self, size):
+ self._test_capitalize(size)
+
+ @bigmemtest(size=_2G, memuse=2)
+ def test_title(self, size):
+ self._test_title(size)
+
+ @bigmemtest(size=_2G, memuse=2)
+ def test_swapcase(self, size):
+ self._test_swapcase(size)
+
test_hash = None
test_split_large = None
@@ -752,7 +825,7 @@ class TupleTest(unittest.TestCase):
def test_compare(self, size):
t1 = ('',) * size
t2 = ('',) * size
- self.assertEqual(t1, t2)
+ self.assertTrue(t1 == t2)
del t2
t2 = ('',) * (size + 1)
self.assertFalse(t1 == t2)
@@ -783,9 +856,9 @@ class TupleTest(unittest.TestCase):
def test_contains(self, size):
t = (1, 2, 3, 4, 5) * size
self.assertEqual(len(t), size * 5)
- self.assertIn(5, t)
- self.assertNotIn((1, 2, 3, 4, 5), t)
- self.assertNotIn(0, t)
+ self.assertTrue(5 in t)
+ self.assertFalse((1, 2, 3, 4, 5) in t)
+ self.assertFalse(0 in t)
@bigmemtest(size=_2G + 10, memuse=8)
def test_hash(self, size):
@@ -869,11 +942,11 @@ class TupleTest(unittest.TestCase):
self.assertEqual(s[-5:], '0, 0)')
self.assertEqual(s.count('0'), size)
- @bigmemtest(size=_2G // 3 + 2, memuse=8 + 3 * character_size)
+ @bigmemtest(size=_2G // 3 + 2, memuse=8 + 3 * ascii_char_size)
def test_repr_small(self, size):
return self.basic_test_repr(size)
- @bigmemtest(size=_2G + 2, memuse=8 + 3 * character_size)
+ @bigmemtest(size=_2G + 2, memuse=8 + 3 * ascii_char_size)
def test_repr_large(self, size):
return self.basic_test_repr(size)
@@ -888,7 +961,7 @@ class ListTest(unittest.TestCase):
def test_compare(self, size):
l1 = [''] * size
l2 = [''] * size
- self.assertEqual(l1, l2)
+ self.assertTrue(l1 == l2)
del l2
l2 = [''] * (size + 1)
self.assertFalse(l1 == l2)
@@ -934,9 +1007,9 @@ class ListTest(unittest.TestCase):
def test_contains(self, size):
l = [1, 2, 3, 4, 5] * size
self.assertEqual(len(l), size * 5)
- self.assertIn(5, l)
- self.assertNotIn([1, 2, 3, 4, 5], l)
- self.assertNotIn(0, l)
+ self.assertTrue(5 in l)
+ self.assertFalse([1, 2, 3, 4, 5] in l)
+ self.assertFalse(0 in l)
@bigmemtest(size=_2G + 10, memuse=8)
def test_hash(self, size):
@@ -1044,11 +1117,11 @@ class ListTest(unittest.TestCase):
self.assertEqual(s[-5:], '0, 0]')
self.assertEqual(s.count('0'), size)
- @bigmemtest(size=_2G // 3 + 2, memuse=8 + 3 * character_size)
+ @bigmemtest(size=_2G // 3 + 2, memuse=8 + 3 * ascii_char_size)
def test_repr_small(self, size):
return self.basic_test_repr(size)
- @bigmemtest(size=_2G + 2, memuse=8 + 3 * character_size)
+ @bigmemtest(size=_2G + 2, memuse=8 + 3 * ascii_char_size)
def test_repr_large(self, size):
return self.basic_test_repr(size)