diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2009-03-07 23:40:49 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2009-03-07 23:40:49 (GMT) |
commit | 7cdb495e90370641f83e70d31d53c4c994672bc2 (patch) | |
tree | e625fa282c38ca6252f5adb9edfe6c7dc23e8e91 | |
parent | d173589c9de6396221d401a66b69b439979e69f0 (diff) | |
download | cpython-7cdb495e90370641f83e70d31d53c4c994672bc2.zip cpython-7cdb495e90370641f83e70d31d53c4c994672bc2.tar.gz cpython-7cdb495e90370641f83e70d31d53c4c994672bc2.tar.bz2 |
Issue #3700: make test_bigmem py3k-compatible, and add bytes/bytearray tests
(please note: some of the tests /do/ fail when run with a sufficiently large memory parameter (-M),
perhaps because of the str/bytes/bytearray overhaul)
-rw-r--r-- | Lib/test/support.py | 7 | ||||
-rw-r--r-- | Lib/test/test_bigmem.py | 666 |
2 files changed, 395 insertions, 278 deletions
diff --git a/Lib/test/support.py b/Lib/test/support.py index 68791d5..0fa9d6b 100644 --- a/Lib/test/support.py +++ b/Lib/test/support.py @@ -609,6 +609,10 @@ def bigmemtest(minsize, memuse, overhead=5*_1M): """ def decorator(f): def wrapper(self): + # Retrieve values in case someone decided to adjust them + minsize = wrapper.minsize + memuse = wrapper.memuse + overhead = wrapper.overhead if not max_memuse: # If max_memuse is 0 (the default), # we still want to run the tests with size set to a few kb, @@ -636,6 +640,9 @@ def bigmemtest(minsize, memuse, overhead=5*_1M): def precisionbigmemtest(size, memuse, overhead=5*_1M): def decorator(f): def wrapper(self): + size = wrapper.size + memuse = wrapper.memuse + overhead = wrapper.overhead if not real_max_memuse: maxsize = 5147 else: diff --git a/Lib/test/test_bigmem.py b/Lib/test/test_bigmem.py index 5edc139..254c21d 100644 --- a/Lib/test/test_bigmem.py +++ b/Lib/test/test_bigmem.py @@ -4,6 +4,7 @@ from test.support import bigmemtest, _1G, _2G, _4G, precisionbigmemtest import unittest import operator import sys +import functools # Bigmem testing houserules: # @@ -13,7 +14,7 @@ import sys # value has been created. Use 'del s' before the create_largestring call. # # - Do *not* compare large objects using assertEquals or similar. It's a -# lengty operation and the errormessage will be utterly useless due to +# lengthy operation and the errormessage will be utterly useless due to # its size. To make sure whether a result has the right contents, better # to use the strip or count methods, or compare meaningful slices. # @@ -32,19 +33,24 @@ import sys # memuse-per-size should remain sane (less than a few thousand); if your # test uses more, adjust 'size' upward, instead. -class StrTest(unittest.TestCase): +character_size = 4 if sys.maxunicode > 0xFFFF else 2 + + +class BaseStrTest: + @bigmemtest(minsize=_2G, memuse=2) def test_capitalize(self, size): - SUBSTR = ' abc def ghi' - s = '-' * size + SUBSTR + _ = self.from_latin1 + SUBSTR = self.from_latin1(' abc def ghi') + s = _('-') * size + SUBSTR caps = s.capitalize() self.assertEquals(caps[-len(SUBSTR):], SUBSTR.capitalize()) - self.assertEquals(caps.lstrip('-'), SUBSTR) + self.assertEquals(caps.lstrip(_('-')), SUBSTR) @bigmemtest(minsize=_2G + 10, memuse=1) def test_center(self, size): - SUBSTR = ' abc def ghi' + SUBSTR = self.from_latin1(' abc def ghi') s = SUBSTR.center(size) self.assertEquals(len(s), size) lpadsize = rpadsize = (len(s) - len(SUBSTR)) // 2 @@ -53,318 +59,283 @@ class StrTest(unittest.TestCase): self.assertEquals(s[lpadsize:-rpadsize], SUBSTR) self.assertEquals(s.strip(), SUBSTR.strip()) - @precisionbigmemtest(size=_2G - 1, memuse=1) - def test_center_unicode(self, size): - SUBSTR = ' abc def ghi' - try: - s = SUBSTR.center(size) - except OverflowError: - pass # acceptable on 32-bit - else: - self.assertEquals(len(s), size) - lpadsize = rpadsize = (len(s) - len(SUBSTR)) // 2 - if len(s) % 2: - lpadsize += 1 - self.assertEquals(s[lpadsize:-rpadsize], SUBSTR) - self.assertEquals(s.strip(), SUBSTR.strip()) - del s - @bigmemtest(minsize=_2G, memuse=2) def test_count(self, size): - SUBSTR = ' abc def ghi' - s = '.' * size + SUBSTR - self.assertEquals(s.count('.'), size) - s += '.' - self.assertEquals(s.count('.'), size + 1) - self.assertEquals(s.count(' '), 3) - self.assertEquals(s.count('i'), 1) - self.assertEquals(s.count('j'), 0) - - @bigmemtest(minsize=_2G + 2, memuse=3) - def test_decode(self, size): - s = b'.' * size - self.assertEquals(len(s.decode('utf-8')), size) - - def basic_encode_test(self, size, enc, c='.', expectedsize=None): - if expectedsize is None: - expectedsize = size - - s = c * size - self.assertEquals(len(s.encode(enc)), expectedsize) - - @bigmemtest(minsize=_2G + 2, memuse=3) - def test_encode(self, size): - return self.basic_encode_test(size, 'utf-8') - - @precisionbigmemtest(size=_4G / 6 + 2, memuse=2) - def test_encode_raw_unicode_escape(self, size): - try: - return self.basic_encode_test(size, 'raw_unicode_escape') - except MemoryError: - pass # acceptable on 32-bit - - @precisionbigmemtest(size=_4G / 5 + 70, memuse=3) - def test_encode_utf7(self, size): - try: - return self.basic_encode_test(size, 'utf7') - except MemoryError: - pass # acceptable on 32-bit - - @precisionbigmemtest(size=_4G / 4 + 5, memuse=6) - def test_encode_utf32(self, size): - try: - return self.basic_encode_test(size, 'utf32', expectedsize=4*size+4) - except MemoryError: - pass # acceptable on 32-bit - - @precisionbigmemtest(size=_2G-1, memuse=2) - def test_decodeascii(self, size): - return self.basic_encode_test(size, 'ascii', c='A') - - @precisionbigmemtest(size=_4G / 5, memuse=6+2) - def test_unicode_repr_oflw(self, size): - try: - s = "\uAAAA"*size - r = repr(s) - except MemoryError: - pass # acceptable on 32-bit - else: - self.failUnless(s == eval(r)) + _ = self.from_latin1 + SUBSTR = _(' abc def ghi') + s = _('.') * size + SUBSTR + self.assertEquals(s.count(_('.')), size) + s += _('.') + self.assertEquals(s.count(_('.')), size + 1) + self.assertEquals(s.count(_(' ')), 3) + self.assertEquals(s.count(_('i')), 1) + self.assertEquals(s.count(_('j')), 0) @bigmemtest(minsize=_2G, memuse=2) def test_endswith(self, size): - SUBSTR = ' abc def ghi' - s = '-' * size + SUBSTR + _ = self.from_latin1 + SUBSTR = _(' abc def ghi') + s = _('-') * size + SUBSTR self.failUnless(s.endswith(SUBSTR)) self.failUnless(s.endswith(s)) - s2 = '...' + s + s2 = _('...') + s self.failUnless(s2.endswith(s)) - self.failIf(s.endswith('a' + SUBSTR)) + self.failIf(s.endswith(_('a') + SUBSTR)) self.failIf(SUBSTR.endswith(s)) @bigmemtest(minsize=_2G + 10, memuse=2) def test_expandtabs(self, size): - s = '-' * size + _ = self.from_latin1 + s = _('-') * size tabsize = 8 self.assertEquals(s.expandtabs(), s) del s slen, remainder = divmod(size, tabsize) - s = ' \t' * slen + s = _(' \t') * slen s = s.expandtabs(tabsize) self.assertEquals(len(s), size - remainder) - self.assertEquals(len(s.strip(' ')), 0) + self.assertEquals(len(s.strip(_(' '))), 0) @bigmemtest(minsize=_2G, memuse=2) def test_find(self, size): - SUBSTR = ' abc def ghi' + _ = self.from_latin1 + SUBSTR = _(' abc def ghi') sublen = len(SUBSTR) - s = ''.join([SUBSTR, '-' * size, SUBSTR]) - self.assertEquals(s.find(' '), 0) + s = _('').join([SUBSTR, _('-') * size, SUBSTR]) + self.assertEquals(s.find(_(' ')), 0) self.assertEquals(s.find(SUBSTR), 0) - self.assertEquals(s.find(' ', sublen), sublen + size) + self.assertEquals(s.find(_(' '), sublen), sublen + size) self.assertEquals(s.find(SUBSTR, len(SUBSTR)), sublen + size) - self.assertEquals(s.find('i'), SUBSTR.find('i')) - self.assertEquals(s.find('i', sublen), - sublen + size + SUBSTR.find('i')) - self.assertEquals(s.find('i', size), - sublen + size + SUBSTR.find('i')) - self.assertEquals(s.find('j'), -1) + self.assertEquals(s.find(_('i')), SUBSTR.find(_('i'))) + self.assertEquals(s.find(_('i'), sublen), + sublen + size + SUBSTR.find(_('i'))) + self.assertEquals(s.find(_('i'), size), + sublen + size + SUBSTR.find(_('i'))) + self.assertEquals(s.find(_('j')), -1) @bigmemtest(minsize=_2G, memuse=2) def test_index(self, size): - SUBSTR = ' abc def ghi' + _ = self.from_latin1 + SUBSTR = _(' abc def ghi') sublen = len(SUBSTR) - s = ''.join([SUBSTR, '-' * size, SUBSTR]) - self.assertEquals(s.index(' '), 0) + s = _('').join([SUBSTR, _('-') * size, SUBSTR]) + self.assertEquals(s.index(_(' ')), 0) self.assertEquals(s.index(SUBSTR), 0) - self.assertEquals(s.index(' ', sublen), sublen + size) + self.assertEquals(s.index(_(' '), sublen), sublen + size) self.assertEquals(s.index(SUBSTR, sublen), sublen + size) - self.assertEquals(s.index('i'), SUBSTR.index('i')) - self.assertEquals(s.index('i', sublen), - sublen + size + SUBSTR.index('i')) - self.assertEquals(s.index('i', size), - sublen + size + SUBSTR.index('i')) - self.assertRaises(ValueError, s.index, 'j') + self.assertEquals(s.index(_('i')), SUBSTR.index(_('i'))) + self.assertEquals(s.index(_('i'), sublen), + sublen + size + SUBSTR.index(_('i'))) + self.assertEquals(s.index(_('i'), size), + sublen + size + SUBSTR.index(_('i'))) + self.assertRaises(ValueError, s.index, _('j')) @bigmemtest(minsize=_2G, memuse=2) def test_isalnum(self, size): - SUBSTR = '123456' - s = 'a' * size + SUBSTR + _ = self.from_latin1 + SUBSTR = _('123456') + s = _('a') * size + SUBSTR self.failUnless(s.isalnum()) - s += '.' + s += _('.') self.failIf(s.isalnum()) @bigmemtest(minsize=_2G, memuse=2) def test_isalpha(self, size): - SUBSTR = 'zzzzzzz' - s = 'a' * size + SUBSTR + _ = self.from_latin1 + SUBSTR = _('zzzzzzz') + s = _('a') * size + SUBSTR self.failUnless(s.isalpha()) - s += '.' + s += _('.') self.failIf(s.isalpha()) @bigmemtest(minsize=_2G, memuse=2) def test_isdigit(self, size): - SUBSTR = '123456' - s = '9' * size + SUBSTR + _ = self.from_latin1 + SUBSTR = _('123456') + s = _('9') * size + SUBSTR self.failUnless(s.isdigit()) - s += 'z' + s += _('z') self.failIf(s.isdigit()) @bigmemtest(minsize=_2G, memuse=2) def test_islower(self, size): - chars = ''.join([ chr(c) for c in range(255) if not chr(c).isupper() ]) + _ = self.from_latin1 + chars = _(''.join( + chr(c) for c in range(255) if not chr(c).isupper())) repeats = size // len(chars) + 2 s = chars * repeats self.failUnless(s.islower()) - s += 'A' + s += _('A') self.failIf(s.islower()) @bigmemtest(minsize=_2G, memuse=2) def test_isspace(self, size): - whitespace = ' \f\n\r\t\v' + _ = self.from_latin1 + whitespace = _(' \f\n\r\t\v') repeats = size // len(whitespace) + 2 s = whitespace * repeats self.failUnless(s.isspace()) - s += 'j' + s += _('j') self.failIf(s.isspace()) @bigmemtest(minsize=_2G, memuse=2) def test_istitle(self, size): - SUBSTR = '123456' - s = ''.join(['A', 'a' * size, SUBSTR]) + _ = self.from_latin1 + SUBSTR = _('123456') + s = _('').join([_('A'), _('a') * size, SUBSTR]) self.failUnless(s.istitle()) - s += 'A' + s += _('A') self.failUnless(s.istitle()) - s += 'aA' + s += _('aA') self.failIf(s.istitle()) @bigmemtest(minsize=_2G, memuse=2) def test_isupper(self, size): - chars = ''.join([ chr(c) for c in range(255) if not chr(c).islower() ]) + _ = self.from_latin1 + chars = _(''.join( + chr(c) for c in range(255) if not chr(c).islower())) repeats = size // len(chars) + 2 s = chars * repeats self.failUnless(s.isupper()) - s += 'a' + s += _('a') self.failIf(s.isupper()) @bigmemtest(minsize=_2G, memuse=2) def test_join(self, size): - s = 'A' * size - x = s.join(['aaaaa', 'bbbbb']) - self.assertEquals(x.count('a'), 5) - self.assertEquals(x.count('b'), 5) - self.failUnless(x.startswith('aaaaaA')) - self.failUnless(x.endswith('Abbbbb')) + _ = self.from_latin1 + s = _('A') * size + x = s.join([_('aaaaa'), _('bbbbb')]) + self.assertEquals(x.count(_('a')), 5) + self.assertEquals(x.count(_('b')), 5) + self.failUnless(x.startswith(_('aaaaaA'))) + self.failUnless(x.endswith(_('Abbbbb'))) @bigmemtest(minsize=_2G + 10, memuse=1) def test_ljust(self, size): - SUBSTR = ' abc def ghi' + _ = self.from_latin1 + SUBSTR = _(' abc def ghi') s = SUBSTR.ljust(size) - self.failUnless(s.startswith(SUBSTR + ' ')) + self.failUnless(s.startswith(SUBSTR + _(' '))) self.assertEquals(len(s), size) self.assertEquals(s.strip(), SUBSTR.strip()) @bigmemtest(minsize=_2G + 10, memuse=2) def test_lower(self, size): - s = 'A' * size + _ = self.from_latin1 + s = _('A') * size s = s.lower() self.assertEquals(len(s), size) - self.assertEquals(s.count('a'), size) + self.assertEquals(s.count(_('a')), size) @bigmemtest(minsize=_2G + 10, memuse=1) def test_lstrip(self, size): - SUBSTR = 'abc def ghi' + _ = self.from_latin1 + SUBSTR = _('abc def ghi') s = SUBSTR.rjust(size) self.assertEquals(len(s), size) self.assertEquals(s.lstrip(), SUBSTR.lstrip()) del s s = SUBSTR.ljust(size) self.assertEquals(len(s), size) - stripped = s.lstrip() - self.failUnless(stripped is s) + # Type-specific optimization + if isinstance(s, (str, bytes)): + stripped = s.lstrip() + self.failUnless(stripped is s) @bigmemtest(minsize=_2G + 10, memuse=2) def test_replace(self, size): - replacement = 'a' - s = ' ' * size - s = s.replace(' ', replacement) + _ = self.from_latin1 + replacement = _('a') + s = _(' ') * size + s = s.replace(_(' '), replacement) self.assertEquals(len(s), size) self.assertEquals(s.count(replacement), size) - s = s.replace(replacement, ' ', size - 4) + s = s.replace(replacement, _(' '), size - 4) self.assertEquals(len(s), size) self.assertEquals(s.count(replacement), 4) - self.assertEquals(s[-10:], ' aaaa') + self.assertEquals(s[-10:], _(' aaaa')) @bigmemtest(minsize=_2G, memuse=2) def test_rfind(self, size): - SUBSTR = ' abc def ghi' + _ = self.from_latin1 + SUBSTR = _(' abc def ghi') sublen = len(SUBSTR) - s = ''.join([SUBSTR, '-' * size, SUBSTR]) - self.assertEquals(s.rfind(' '), sublen + size + SUBSTR.rfind(' ')) + s = _('').join([SUBSTR, _('-') * size, SUBSTR]) + self.assertEquals(s.rfind(_(' ')), sublen + size + SUBSTR.rfind(_(' '))) self.assertEquals(s.rfind(SUBSTR), sublen + size) - self.assertEquals(s.rfind(' ', 0, size), SUBSTR.rfind(' ')) + self.assertEquals(s.rfind(_(' '), 0, size), SUBSTR.rfind(_(' '))) self.assertEquals(s.rfind(SUBSTR, 0, sublen + size), 0) - self.assertEquals(s.rfind('i'), sublen + size + SUBSTR.rfind('i')) - self.assertEquals(s.rfind('i', 0, sublen), SUBSTR.rfind('i')) - self.assertEquals(s.rfind('i', 0, sublen + size), - SUBSTR.rfind('i')) - self.assertEquals(s.rfind('j'), -1) + self.assertEquals(s.rfind(_('i')), sublen + size + SUBSTR.rfind(_('i'))) + self.assertEquals(s.rfind(_('i'), 0, sublen), SUBSTR.rfind(_('i'))) + self.assertEquals(s.rfind(_('i'), 0, sublen + size), + SUBSTR.rfind(_('i'))) + self.assertEquals(s.rfind(_('j')), -1) @bigmemtest(minsize=_2G, memuse=2) def test_rindex(self, size): - SUBSTR = ' abc def ghi' + _ = self.from_latin1 + SUBSTR = _(' abc def ghi') sublen = len(SUBSTR) - s = ''.join([SUBSTR, '-' * size, SUBSTR]) - self.assertEquals(s.rindex(' '), - sublen + size + SUBSTR.rindex(' ')) + s = _('').join([SUBSTR, _('-') * size, SUBSTR]) + self.assertEquals(s.rindex(_(' ')), + sublen + size + SUBSTR.rindex(_(' '))) self.assertEquals(s.rindex(SUBSTR), sublen + size) - self.assertEquals(s.rindex(' ', 0, sublen + size - 1), - SUBSTR.rindex(' ')) + self.assertEquals(s.rindex(_(' '), 0, sublen + size - 1), + SUBSTR.rindex(_(' '))) self.assertEquals(s.rindex(SUBSTR, 0, sublen + size), 0) - self.assertEquals(s.rindex('i'), - sublen + size + SUBSTR.rindex('i')) - self.assertEquals(s.rindex('i', 0, sublen), SUBSTR.rindex('i')) - self.assertEquals(s.rindex('i', 0, sublen + size), - SUBSTR.rindex('i')) - self.assertRaises(ValueError, s.rindex, 'j') + self.assertEquals(s.rindex(_('i')), + sublen + size + SUBSTR.rindex(_('i'))) + self.assertEquals(s.rindex(_('i'), 0, sublen), SUBSTR.rindex(_('i'))) + self.assertEquals(s.rindex(_('i'), 0, sublen + size), + SUBSTR.rindex(_('i'))) + self.assertRaises(ValueError, s.rindex, _('j')) @bigmemtest(minsize=_2G + 10, memuse=1) def test_rjust(self, size): - SUBSTR = ' abc def ghi' + _ = self.from_latin1 + SUBSTR = _(' abc def ghi') s = SUBSTR.ljust(size) - self.failUnless(s.startswith(SUBSTR + ' ')) + self.failUnless(s.startswith(SUBSTR + _(' '))) self.assertEquals(len(s), size) self.assertEquals(s.strip(), SUBSTR.strip()) @bigmemtest(minsize=_2G + 10, memuse=1) def test_rstrip(self, size): - SUBSTR = ' abc def ghi' + _ = self.from_latin1 + SUBSTR = _(' abc def ghi') s = SUBSTR.ljust(size) self.assertEquals(len(s), size) self.assertEquals(s.rstrip(), SUBSTR.rstrip()) del s s = SUBSTR.rjust(size) self.assertEquals(len(s), size) - stripped = s.rstrip() - self.failUnless(stripped is s) + # Type-specific optimization + if isinstance(s, (str, bytes)): + stripped = s.rstrip() + self.failUnless(stripped is s) # The test takes about size bytes to build a string, and then about # sqrt(size) substrings of sqrt(size) in size and a list to # hold sqrt(size) items. It's close but just over 2x size. @bigmemtest(minsize=_2G, memuse=2.1) def test_split_small(self, size): + _ = self.from_latin1 # Crudely calculate an estimate so that the result of s.split won't # take up an inordinate amount of memory chunksize = int(size ** 0.5 + 2) - SUBSTR = 'a' + ' ' * chunksize + SUBSTR = _('a') + _(' ') * chunksize s = SUBSTR * chunksize l = s.split() self.assertEquals(len(l), chunksize) - self.assertEquals(set(l), set(['a'])) + expected = _('a') + for item in l: + self.assertEquals(item, expected) del l - l = s.split('a') + l = s.split(_('a')) self.assertEquals(len(l), chunksize + 1) - self.assertEquals(set(l), set(['', ' ' * chunksize])) + expected = _(' ') * chunksize + for item in filter(None, l): + self.assertEquals(item, expected) # Allocates a string of twice size (and briefly two) and a list of # size. Because of internal affairs, the s.split() call produces a @@ -374,37 +345,43 @@ class StrTest(unittest.TestCase): # 8*size bytes. @bigmemtest(minsize=_2G + 5, memuse=10) def test_split_large(self, size): - s = ' a' * size + ' ' + _ = self.from_latin1 + s = _(' a') * size + _(' ') l = s.split() self.assertEquals(len(l), size) - self.assertEquals(set(l), set(['a'])) + self.assertEquals(set(l), set([_('a')])) del l - l = s.split('a') + l = s.split(_('a')) self.assertEquals(len(l), size + 1) - self.assertEquals(set(l), set([' '])) + self.assertEquals(set(l), set([_(' ')])) @bigmemtest(minsize=_2G, memuse=2.1) def test_splitlines(self, size): + _ = self.from_latin1 # Crudely calculate an estimate so that the result of s.split won't # take up an inordinate amount of memory chunksize = int(size ** 0.5 + 2) // 2 - SUBSTR = ' ' * chunksize + '\n' + ' ' * chunksize + '\r\n' + SUBSTR = _(' ') * chunksize + _('\n') + _(' ') * chunksize + _('\r\n') s = SUBSTR * chunksize l = s.splitlines() self.assertEquals(len(l), chunksize * 2) - self.assertEquals(set(l), set([' ' * chunksize])) + expected = _(' ') * chunksize + for item in l: + self.assertEquals(item, expected) @bigmemtest(minsize=_2G, memuse=2) def test_startswith(self, size): - SUBSTR = ' abc def ghi' - s = '-' * size + SUBSTR + _ = self.from_latin1 + SUBSTR = _(' abc def ghi') + s = _('-') * size + SUBSTR self.failUnless(s.startswith(s)) - self.failUnless(s.startswith('-' * size)) + self.failUnless(s.startswith(_('-') * size)) self.failIf(s.startswith(SUBSTR)) @bigmemtest(minsize=_2G, memuse=1) def test_strip(self, size): - SUBSTR = ' abc def ghi ' + _ = self.from_latin1 + SUBSTR = _(' abc def ghi ') s = SUBSTR.rjust(size) self.assertEquals(len(s), size) self.assertEquals(s.strip(), SUBSTR.strip()) @@ -415,7 +392,8 @@ class StrTest(unittest.TestCase): @bigmemtest(minsize=_2G, memuse=2) def test_swapcase(self, size): - SUBSTR = "aBcDeFG12.'\xa9\x00" + _ = self.from_latin1 + SUBSTR = _("aBcDeFG12.'\xa9\x00") sublen = len(SUBSTR) repeats = size // sublen + 2 s = SUBSTR * repeats @@ -426,7 +404,8 @@ class StrTest(unittest.TestCase): @bigmemtest(minsize=_2G, memuse=2) def test_title(self, size): - SUBSTR = 'SpaaHAaaAaham' + _ = self.from_latin1 + SUBSTR = _('SpaaHAaaAaham') s = SUBSTR * (size // len(SUBSTR) + 2) s = s.title() self.failUnless(s.startswith((SUBSTR * 3).title())) @@ -434,8 +413,19 @@ class StrTest(unittest.TestCase): @bigmemtest(minsize=_2G, memuse=2) def test_translate(self, size): - trans = {ord('.'):'-', ord('a'):'!', ord('Z'):'$'} - SUBSTR = 'aZz.z.Aaz.' + _ = self.from_latin1 + trans = { + ord(_('.')): _('-'), + ord(_('a')): _('!'), + ord(_('Z')): _('$'), + } + SUBSTR = _('aZz.z.Aaz.') + if not isinstance(SUBSTR, str): + # Workaround the inexistence of bytes.maketrans() + chars = bytearray(range(256)) + for k, v in trans.items(): + chars[k] = ord(v) + trans = chars sublen = len(SUBSTR) repeats = size // sublen + 2 s = SUBSTR * repeats @@ -443,106 +433,54 @@ class StrTest(unittest.TestCase): self.assertEquals(len(s), repeats * sublen) self.assertEquals(s[:sublen], SUBSTR.translate(trans)) self.assertEquals(s[-sublen:], SUBSTR.translate(trans)) - self.assertEquals(s.count('.'), 0) - self.assertEquals(s.count('!'), repeats * 2) - self.assertEquals(s.count('z'), repeats * 3) + self.assertEquals(s.count(_('.')), 0) + self.assertEquals(s.count(_('!')), repeats * 2) + self.assertEquals(s.count(_('z')), repeats * 3) @bigmemtest(minsize=_2G + 5, memuse=2) def test_upper(self, size): - s = 'a' * size + _ = self.from_latin1 + s = _('a') * size s = s.upper() self.assertEquals(len(s), size) - self.assertEquals(s.count('A'), size) + self.assertEquals(s.count(_('A')), size) @bigmemtest(minsize=_2G + 20, memuse=1) def test_zfill(self, size): - SUBSTR = '-568324723598234' + _ = self.from_latin1 + SUBSTR = _('-568324723598234') s = SUBSTR.zfill(size) - self.failUnless(s.endswith('0' + SUBSTR[1:])) - self.failUnless(s.startswith('-0')) + self.failUnless(s.endswith(_('0') + SUBSTR[1:])) + self.failUnless(s.startswith(_('-0'))) self.assertEquals(len(s), size) - self.assertEquals(s.count('0'), size - len(SUBSTR)) - - @bigmemtest(minsize=_2G + 10, memuse=2) - def test_format(self, size): - s = '-' * size - sf = '%s' % (s,) - self.assertEqual(s, sf) - del sf - sf = '..%s..' % (s,) - self.assertEquals(len(sf), len(s) + 4) - self.failUnless(sf.startswith('..-')) - self.failUnless(sf.endswith('-..')) - del s, sf - - size //= 2 - edge = '-' * size - s = ''.join([edge, '%s', edge]) - del edge - s = s % '...' - self.assertEquals(len(s), size * 2 + 3) - self.assertEquals(s.count('.'), 3) - self.assertEquals(s.count('-'), size * 2) - - @bigmemtest(minsize=_2G + 10, memuse=2) - def test_repr_small(self, size): - s = '-' * size - s = repr(s) - self.assertEquals(len(s), size + 2) - self.assertEquals(s[0], "'") - self.assertEquals(s[-1], "'") - self.assertEquals(s.count('-'), size) - del s - # repr() will create a string four times as large as this 'binary - # string', but we don't want to allocate much more than twice - # size in total. (We do extra testing in test_repr_large()) - size = size // 5 * 2 - s = '\x00' * size - s = repr(s) - self.assertEquals(len(s), size * 4 + 2) - self.assertEquals(s[0], "'") - self.assertEquals(s[-1], "'") - self.assertEquals(s.count('\\'), size) - self.assertEquals(s.count('0'), size * 2) - - @bigmemtest(minsize=_2G + 10, memuse=5) - def test_repr_large(self, size): - s = '\x00' * size - s = repr(s) - self.assertEquals(len(s), size * 4 + 2) - self.assertEquals(s[0], "'") - self.assertEquals(s[-1], "'") - self.assertEquals(s.count('\\'), size) - self.assertEquals(s.count('0'), size * 2) - - @bigmemtest(minsize=2**32 / 5, memuse=6+2) - def test_unicode_repr(self, size): - s = "\uAAAA" * size - self.failUnless(len(repr(s)) > size) + self.assertEquals(s.count(_('0')), size - len(SUBSTR)) # This test is meaningful even with size < 2G, as long as the # doubled string is > 2G (but it tests more if both are > 2G :) @bigmemtest(minsize=_1G + 2, memuse=3) def test_concat(self, size): - s = '.' * size + _ = self.from_latin1 + s = _('.') * size self.assertEquals(len(s), size) s = s + s self.assertEquals(len(s), size * 2) - self.assertEquals(s.count('.'), size * 2) + self.assertEquals(s.count(_('.')), size * 2) # This test is meaningful even with size < 2G, as long as the # repeated string is > 2G (but it tests more if both are > 2G :) @bigmemtest(minsize=_1G + 2, memuse=3) def test_repeat(self, size): - s = '.' * size + _ = self.from_latin1 + s = _('.') * size self.assertEquals(len(s), size) s = s * 2 self.assertEquals(len(s), size * 2) - self.assertEquals(s.count('.'), size * 2) + self.assertEquals(s.count(_('.')), size * 2) - @bigmemtest(minsize=_2G + 20, memuse=1) + @bigmemtest(minsize=_2G + 20, memuse=2) def test_slice_and_getitem(self, size): - SUBSTR = '0123456789' + _ = self.from_latin1 + SUBSTR = _('0123456789') sublen = len(SUBSTR) s = SUBSTR * (size // sublen) stepsize = len(s) // 100 @@ -560,9 +498,9 @@ class StrTest(unittest.TestCase): self.assertEquals(s[-1], SUBSTR[-1]) self.assertEquals(s[len(s) - 10], SUBSTR[0]) self.assertEquals(s[-sublen], SUBSTR[0]) - self.assertEquals(s[len(s):], '') - self.assertEquals(s[len(s) - 1:], SUBSTR[-1]) - self.assertEquals(s[-1:], SUBSTR[-1]) + self.assertEquals(s[len(s):], _('')) + self.assertEquals(s[len(s) - 1:], SUBSTR[-1:]) + self.assertEquals(s[-1:], SUBSTR[-1:]) self.assertEquals(s[len(s) - sublen:], SUBSTR) self.assertEquals(s[-sublen:], SUBSTR) self.assertEquals(len(s[:]), len(s)) @@ -575,27 +513,29 @@ class StrTest(unittest.TestCase): @bigmemtest(minsize=_2G, memuse=2) def test_contains(self, size): - SUBSTR = '0123456789' - edge = '-' * (size // 2) - s = ''.join([edge, SUBSTR, edge]) + _ = self.from_latin1 + SUBSTR = _('0123456789') + edge = _('-') * (size // 2) + s = _('').join([edge, SUBSTR, edge]) del edge self.failUnless(SUBSTR in s) self.failIf(SUBSTR * 2 in s) - self.failUnless('-' in s) - self.failIf('a' in s) - s += 'a' - self.failUnless('a' in s) + self.failUnless(_('-') in s) + self.failIf(_('a') in s) + s += _('a') + self.failUnless(_('a') in s) @bigmemtest(minsize=_2G + 10, memuse=2) def test_compare(self, size): - s1 = '-' * size - s2 = '-' * size + _ = self.from_latin1 + s1 = _('-') * size + s2 = _('-') * size self.assertEqual(s1, s2) del s2 - s2 = s1 + 'a' + s2 = s1 + _('a') self.failIf(s1 == s2) del s2 - s2 = '.' * size + s2 = _('.') * size self.failIf(s1 == s2) @bigmemtest(minsize=_2G + 10, memuse=1) @@ -606,12 +546,181 @@ class StrTest(unittest.TestCase): # test is dodgy (there's no *guarantee* that the two things should # have a different hash, even if they, in the current # implementation, almost always do.) - s = '\x00' * size + _ = self.from_latin1 + s = _('\x00') * size h1 = hash(s) del s - s = '\x00' * (size + 1) + s = _('\x00') * (size + 1) self.failIf(h1 == hash(s)) + +class StrTest(unittest.TestCase, BaseStrTest): + + def from_latin1(self, s): + return s + + def basic_encode_test(self, size, enc, c='.', expectedsize=None): + if expectedsize is None: + expectedsize = size + + s = c * size + self.assertEquals(len(s.encode(enc)), expectedsize) + + def setUp(self): + # HACK: adjust memory use of tests inherited from BaseStrTest + # according to character size. + self._adjusted = {} + for name in dir(BaseStrTest): + if not name.startswith('test_'): + continue + meth = getattr(type(self), name) + try: + memuse = meth.memuse + except AttributeError: + continue + meth.memuse = character_size * memuse + self._adjusted[name] = memuse + + def tearDown(self): + for name, memuse in self._adjusted.items(): + getattr(type(self), name).memuse = memuse + + @bigmemtest(minsize=_2G + 2, memuse=character_size + 1) + def test_encode(self, size): + return self.basic_encode_test(size, 'utf-8') + + @precisionbigmemtest(size=_4G // 6 + 2, memuse=character_size + 1) + def test_encode_raw_unicode_escape(self, size): + try: + return self.basic_encode_test(size, 'raw_unicode_escape') + except MemoryError: + pass # acceptable on 32-bit + + @precisionbigmemtest(size=_4G // 5 + 70, memuse=character_size + 1) + def test_encode_utf7(self, size): + try: + return self.basic_encode_test(size, 'utf7') + except MemoryError: + pass # acceptable on 32-bit + + @precisionbigmemtest(size=_4G // 4 + 5, memuse=character_size + 4) + def test_encode_utf32(self, size): + try: + return self.basic_encode_test(size, 'utf32', expectedsize=4*size+4) + except MemoryError: + pass # acceptable on 32-bit + + @precisionbigmemtest(size=_2G - 1, memuse=character_size + 1) + def test_encode_ascii(self, size): + return self.basic_encode_test(size, 'ascii', c='A') + + @precisionbigmemtest(size=_4G // 5, memuse=character_size * (6 + 1)) + def test_unicode_repr_overflow(self, size): + try: + s = "\uAAAA"*size + r = repr(s) + except MemoryError: + pass # acceptable on 32-bit + else: + self.failUnless(s == eval(r)) + + @bigmemtest(minsize=_2G + 10, memuse=character_size * 2) + def test_format(self, size): + s = '-' * size + sf = '%s' % (s,) + self.assertEqual(s, sf) + del sf + sf = '..%s..' % (s,) + self.assertEquals(len(sf), len(s) + 4) + self.failUnless(sf.startswith('..-')) + self.failUnless(sf.endswith('-..')) + del s, sf + + size //= 2 + edge = '-' * size + s = ''.join([edge, '%s', edge]) + del edge + s = s % '...' + self.assertEquals(len(s), size * 2 + 3) + self.assertEquals(s.count('.'), 3) + self.assertEquals(s.count('-'), size * 2) + + @bigmemtest(minsize=_2G + 10, memuse=character_size * 2) + def test_repr_small(self, size): + s = '-' * size + s = repr(s) + self.assertEquals(len(s), size + 2) + self.assertEquals(s[0], "'") + self.assertEquals(s[-1], "'") + self.assertEquals(s.count('-'), size) + del s + # repr() will create a string four times as large as this 'binary + # string', but we don't want to allocate much more than twice + # size in total. (We do extra testing in test_repr_large()) + size = size // 5 * 2 + s = '\x00' * size + s = repr(s) + self.assertEquals(len(s), size * 4 + 2) + self.assertEquals(s[0], "'") + self.assertEquals(s[-1], "'") + self.assertEquals(s.count('\\'), size) + self.assertEquals(s.count('0'), size * 2) + + @bigmemtest(minsize=_2G + 10, memuse=character_size * 5) + def test_repr_large(self, size): + s = '\x00' * size + s = repr(s) + self.assertEquals(len(s), size * 4 + 2) + self.assertEquals(s[0], "'") + self.assertEquals(s[-1], "'") + self.assertEquals(s.count('\\'), size) + self.assertEquals(s.count('0'), size * 2) + + @bigmemtest(minsize=2**32 / 5, memuse=character_size * 7) + def test_unicode_repr(self, size): + s = "\uAAAA" * size + for f in (repr, ascii): + r = f(s) + self.failUnless(len(r) > size) + self.failUnless(r.endswith(r"\uaaaa'"), r[-10:]) + del r + + # The character takes 4 bytes even in UCS-2 builds because it will + # be decomposed into surrogates. + @bigmemtest(minsize=2**32 / 5, memuse=4 + character_size * 9) + def test_unicode_repr_wide(self, size): + s = "\U0001AAAA" * size + for f in (repr, ascii): + r = f(s) + self.failUnless(len(r) > size) + self.failUnless(r.endswith(r"\U0001aaaa'"), r[-12:]) + del r + + +class BytesTest(unittest.TestCase, BaseStrTest): + + def from_latin1(self, s): + return s.encode("latin1") + + @bigmemtest(minsize=_2G + 2, memuse=1 + character_size) + def test_decode(self, size): + s = self.from_latin1('.') * size + self.assertEquals(len(s.decode('utf-8')), size) + + +class BytearrayTest(unittest.TestCase, BaseStrTest): + + def from_latin1(self, s): + return bytearray(s.encode("latin1")) + + @bigmemtest(minsize=_2G + 2, memuse=1 + character_size) + def test_decode(self, size): + s = self.from_latin1('.') * size + self.assertEquals(len(s.decode('utf-8')), size) + + test_hash = None + test_split_large = None + class TupleTest(unittest.TestCase): # Tuples have a small, fixed-sized head and an array of pointers to @@ -1048,7 +1157,8 @@ class ListTest(unittest.TestCase): self.assertEquals(l[-10:], [5] * 10) def test_main(): - support.run_unittest(StrTest, TupleTest, ListTest) + support.run_unittest(StrTest, BytesTest, BytearrayTest, + TupleTest, ListTest) if __name__ == '__main__': if len(sys.argv) > 1: |