summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_unicode.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_unicode.py')
-rw-r--r--Lib/test/test_unicode.py525
1 files changed, 447 insertions, 78 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index ef0fd1c..f046938 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -7,6 +7,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
"""#"
import _string
import codecs
+import itertools
import struct
import sys
import unittest
@@ -31,6 +32,19 @@ def search_function(encoding):
return None
codecs.register(search_function)
+def duplicate_string(text):
+ """
+ Try to get a fresh clone of the specified text:
+ new object with a reference count of 1.
+
+ This is a best-effort: latin1 single letters and the empty
+ string ('') are singletons and cannot be cloned.
+ """
+ return text.encode().decode()
+
+class StrSubclass(str):
+ pass
+
class UnicodeTest(string_tests.CommonTest,
string_tests.MixinStrUnicodeUserStringTest,
string_tests.MixinStrUnicodeTest,
@@ -169,6 +183,19 @@ class UnicodeTest(string_tests.CommonTest,
self.checkequalnofix(3, 'aaa', 'count', 'a', -10)
self.checkequalnofix(2, 'aaa', 'count', 'a', 0, -1)
self.checkequalnofix(0, 'aaa', 'count', 'a', 0, -10)
+ # test mixed kinds
+ self.checkequal(10, '\u0102' + 'a' * 10, 'count', 'a')
+ self.checkequal(10, '\U00100304' + 'a' * 10, 'count', 'a')
+ self.checkequal(10, '\U00100304' + '\u0102' * 10, 'count', '\u0102')
+ self.checkequal(0, 'a' * 10, 'count', '\u0102')
+ self.checkequal(0, 'a' * 10, 'count', '\U00100304')
+ self.checkequal(0, '\u0102' * 10, 'count', '\U00100304')
+ self.checkequal(10, '\u0102' + 'a_' * 10, 'count', 'a_')
+ self.checkequal(10, '\U00100304' + 'a_' * 10, 'count', 'a_')
+ self.checkequal(10, '\U00100304' + '\u0102_' * 10, 'count', '\u0102_')
+ self.checkequal(0, 'a' * 10, 'count', 'a\u0102')
+ self.checkequal(0, 'a' * 10, 'count', 'a\U00100304')
+ self.checkequal(0, '\u0102' * 10, 'count', '\u0102\U00100304')
def test_find(self):
string_tests.CommonTest.test_find(self)
@@ -187,6 +214,19 @@ class UnicodeTest(string_tests.CommonTest,
self.assertRaises(TypeError, 'hello'.find)
self.assertRaises(TypeError, 'hello'.find, 42)
+ # test mixed kinds
+ self.checkequal(100, '\u0102' * 100 + 'a', 'find', 'a')
+ self.checkequal(100, '\U00100304' * 100 + 'a', 'find', 'a')
+ self.checkequal(100, '\U00100304' * 100 + '\u0102', 'find', '\u0102')
+ self.checkequal(-1, 'a' * 100, 'find', '\u0102')
+ self.checkequal(-1, 'a' * 100, 'find', '\U00100304')
+ self.checkequal(-1, '\u0102' * 100, 'find', '\U00100304')
+ self.checkequal(100, '\u0102' * 100 + 'a_', 'find', 'a_')
+ self.checkequal(100, '\U00100304' * 100 + 'a_', 'find', 'a_')
+ self.checkequal(100, '\U00100304' * 100 + '\u0102_', 'find', '\u0102_')
+ self.checkequal(-1, 'a' * 100, 'find', 'a\u0102')
+ self.checkequal(-1, 'a' * 100, 'find', 'a\U00100304')
+ self.checkequal(-1, '\u0102' * 100, 'find', '\u0102\U00100304')
def test_rfind(self):
string_tests.CommonTest.test_rfind(self)
@@ -202,6 +242,19 @@ class UnicodeTest(string_tests.CommonTest,
self.checkequalnofix(9, 'abcdefghiabc', 'rfind', 'abc')
self.checkequalnofix(12, 'abcdefghiabc', 'rfind', '')
self.checkequalnofix(12, 'abcdefghiabc', 'rfind', '')
+ # test mixed kinds
+ self.checkequal(0, 'a' + '\u0102' * 100, 'rfind', 'a')
+ self.checkequal(0, 'a' + '\U00100304' * 100, 'rfind', 'a')
+ self.checkequal(0, '\u0102' + '\U00100304' * 100, 'rfind', '\u0102')
+ self.checkequal(-1, 'a' * 100, 'rfind', '\u0102')
+ self.checkequal(-1, 'a' * 100, 'rfind', '\U00100304')
+ self.checkequal(-1, '\u0102' * 100, 'rfind', '\U00100304')
+ self.checkequal(0, '_a' + '\u0102' * 100, 'rfind', '_a')
+ self.checkequal(0, '_a' + '\U00100304' * 100, 'rfind', '_a')
+ self.checkequal(0, '_\u0102' + '\U00100304' * 100, 'rfind', '_\u0102')
+ self.checkequal(-1, 'a' * 100, 'rfind', '\u0102a')
+ self.checkequal(-1, 'a' * 100, 'rfind', '\U00100304a')
+ self.checkequal(-1, '\u0102' * 100, 'rfind', '\U00100304\u0102')
def test_index(self):
string_tests.CommonTest.test_index(self)
@@ -213,6 +266,19 @@ class UnicodeTest(string_tests.CommonTest,
self.assertRaises(ValueError, 'abcdefghiab'.index, 'abc', 1)
self.assertRaises(ValueError, 'abcdefghi'.index, 'ghi', 8)
self.assertRaises(ValueError, 'abcdefghi'.index, 'ghi', -1)
+ # test mixed kinds
+ self.checkequal(100, '\u0102' * 100 + 'a', 'index', 'a')
+ self.checkequal(100, '\U00100304' * 100 + 'a', 'index', 'a')
+ self.checkequal(100, '\U00100304' * 100 + '\u0102', 'index', '\u0102')
+ self.assertRaises(ValueError, ('a' * 100).index, '\u0102')
+ self.assertRaises(ValueError, ('a' * 100).index, '\U00100304')
+ self.assertRaises(ValueError, ('\u0102' * 100).index, '\U00100304')
+ self.checkequal(100, '\u0102' * 100 + 'a_', 'index', 'a_')
+ self.checkequal(100, '\U00100304' * 100 + 'a_', 'index', 'a_')
+ self.checkequal(100, '\U00100304' * 100 + '\u0102_', 'index', '\u0102_')
+ self.assertRaises(ValueError, ('a' * 100).index, 'a\u0102')
+ self.assertRaises(ValueError, ('a' * 100).index, 'a\U00100304')
+ self.assertRaises(ValueError, ('\u0102' * 100).index, '\u0102\U00100304')
def test_rindex(self):
string_tests.CommonTest.test_rindex(self)
@@ -226,6 +292,19 @@ class UnicodeTest(string_tests.CommonTest,
self.assertRaises(ValueError, 'defghiabc'.rindex, 'abc', 0, -1)
self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, 8)
self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, -1)
+ # test mixed kinds
+ self.checkequal(0, 'a' + '\u0102' * 100, 'rindex', 'a')
+ self.checkequal(0, 'a' + '\U00100304' * 100, 'rindex', 'a')
+ self.checkequal(0, '\u0102' + '\U00100304' * 100, 'rindex', '\u0102')
+ self.assertRaises(ValueError, ('a' * 100).rindex, '\u0102')
+ self.assertRaises(ValueError, ('a' * 100).rindex, '\U00100304')
+ self.assertRaises(ValueError, ('\u0102' * 100).rindex, '\U00100304')
+ self.checkequal(0, '_a' + '\u0102' * 100, 'rindex', '_a')
+ self.checkequal(0, '_a' + '\U00100304' * 100, 'rindex', '_a')
+ self.checkequal(0, '_\u0102' + '\U00100304' * 100, 'rindex', '_\u0102')
+ self.assertRaises(ValueError, ('a' * 100).rindex, '\u0102a')
+ self.assertRaises(ValueError, ('a' * 100).rindex, '\U00100304a')
+ self.assertRaises(ValueError, ('\u0102' * 100).rindex, '\U00100304\u0102')
def test_maketrans_translate(self):
# these work with plain translate()
@@ -266,6 +345,69 @@ class UnicodeTest(string_tests.CommonTest,
self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
self.checkequalnofix(['endcase ', ''], 'endcase test', 'split', 'test')
+ # test mixed kinds
+ for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'):
+ left *= 9
+ right *= 9
+ for delim in ('c', '\u0102', '\U00010302'):
+ self.checkequal([left + right],
+ left + right, 'split', delim)
+ self.checkequal([left, right],
+ left + delim + right, 'split', delim)
+ self.checkequal([left + right],
+ left + right, 'split', delim * 2)
+ self.checkequal([left, right],
+ left + delim * 2 + right, 'split', delim *2)
+
+ def test_rsplit(self):
+ string_tests.CommonTest.test_rsplit(self)
+ # test mixed kinds
+ for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'):
+ left *= 9
+ right *= 9
+ for delim in ('c', '\u0102', '\U00010302'):
+ self.checkequal([left + right],
+ left + right, 'rsplit', delim)
+ self.checkequal([left, right],
+ left + delim + right, 'rsplit', delim)
+ self.checkequal([left + right],
+ left + right, 'rsplit', delim * 2)
+ self.checkequal([left, right],
+ left + delim * 2 + right, 'rsplit', delim *2)
+
+ def test_partition(self):
+ string_tests.MixinStrUnicodeUserStringTest.test_partition(self)
+ # test mixed kinds
+ self.checkequal(('ABCDEFGH', '', ''), 'ABCDEFGH', 'partition', '\u4200')
+ for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'):
+ left *= 9
+ right *= 9
+ for delim in ('c', '\u0102', '\U00010302'):
+ self.checkequal((left + right, '', ''),
+ left + right, 'partition', delim)
+ self.checkequal((left, delim, right),
+ left + delim + right, 'partition', delim)
+ self.checkequal((left + right, '', ''),
+ left + right, 'partition', delim * 2)
+ self.checkequal((left, delim * 2, right),
+ left + delim * 2 + right, 'partition', delim * 2)
+
+ def test_rpartition(self):
+ string_tests.MixinStrUnicodeUserStringTest.test_rpartition(self)
+ # test mixed kinds
+ self.checkequal(('', '', 'ABCDEFGH'), 'ABCDEFGH', 'rpartition', '\u4200')
+ for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'):
+ left *= 9
+ right *= 9
+ for delim in ('c', '\u0102', '\U00010302'):
+ self.checkequal(('', '', left + right),
+ left + right, 'rpartition', delim)
+ self.checkequal((left, delim, right),
+ left + delim + right, 'rpartition', delim)
+ self.checkequal(('', '', left + right),
+ left + right, 'rpartition', delim * 2)
+ self.checkequal((left, delim * 2, right),
+ left + delim * 2 + right, 'rpartition', delim * 2)
def test_join(self):
string_tests.MixinStrUnicodeUserStringTest.test_join(self)
@@ -293,6 +435,22 @@ class UnicodeTest(string_tests.CommonTest,
# method call forwarded from str implementation because of unicode argument
self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)
self.assertRaises(TypeError, 'replace'.replace, "r", 42)
+ # test mixed kinds
+ for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'):
+ left *= 9
+ right *= 9
+ for delim in ('c', '\u0102', '\U00010302'):
+ for repl in ('d', '\u0103', '\U00010303'):
+ self.checkequal(left + right,
+ left + right, 'replace', delim, repl)
+ self.checkequal(left + repl + right,
+ left + delim + right,
+ 'replace', delim, repl)
+ self.checkequal(left + right,
+ left + right, 'replace', delim * 2, repl)
+ self.checkequal(left + repl + right,
+ left + delim * 2 + right,
+ 'replace', delim * 2, repl)
@support.cpython_only
def test_replace_id(self):
@@ -665,7 +823,15 @@ class UnicodeTest(string_tests.CommonTest,
@support.cpython_only
def test_case_operation_overflow(self):
# Issue #22643
- self.assertRaises(OverflowError, ("ü"*(2**32//12 + 1)).upper)
+ size = 2**32//12 + 1
+ try:
+ s = "ü" * size
+ except MemoryError:
+ self.skipTest('no enough memory (%.0f MiB required)' % (size / 2**20))
+ try:
+ self.assertRaises(OverflowError, s.upper)
+ finally:
+ del s
def test_contains(self):
# Testing Unicode contains method
@@ -697,6 +863,14 @@ class UnicodeTest(string_tests.CommonTest,
self.assertNotIn('asdf', '')
self.assertRaises(TypeError, "abc".__contains__)
+ # test mixed kinds
+ for fill in ('a', '\u0100', '\U00010300'):
+ fill *= 9
+ for delim in ('c', '\u0102', '\U00010302'):
+ self.assertNotIn(delim, fill)
+ self.assertIn(delim, fill + delim)
+ self.assertNotIn(delim * 2, fill)
+ self.assertIn(delim * 2, fill + delim * 2)
def test_issue18183(self):
'\U00010000\U00100000'.lower()
@@ -840,6 +1014,27 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('{0:10000}'.format(''), ' ' * 10000)
self.assertEqual('{0:10000000}'.format(''), ' ' * 10000000)
+ # issue 12546: use \x00 as a fill character
+ self.assertEqual('{0:\x00<6s}'.format('foo'), 'foo\x00\x00\x00')
+ self.assertEqual('{0:\x01<6s}'.format('foo'), 'foo\x01\x01\x01')
+ self.assertEqual('{0:\x00^6s}'.format('foo'), '\x00foo\x00\x00')
+ self.assertEqual('{0:^6s}'.format('foo'), ' foo ')
+
+ self.assertEqual('{0:\x00<6}'.format(3), '3\x00\x00\x00\x00\x00')
+ self.assertEqual('{0:\x01<6}'.format(3), '3\x01\x01\x01\x01\x01')
+ self.assertEqual('{0:\x00^6}'.format(3), '\x00\x003\x00\x00\x00')
+ self.assertEqual('{0:<6}'.format(3), '3 ')
+
+ self.assertEqual('{0:\x00<6}'.format(3.14), '3.14\x00\x00')
+ self.assertEqual('{0:\x01<6}'.format(3.14), '3.14\x01\x01')
+ self.assertEqual('{0:\x00^6}'.format(3.14), '\x003.14\x00')
+ self.assertEqual('{0:^6}'.format(3.14), ' 3.14 ')
+
+ self.assertEqual('{0:\x00<12}'.format(3+2.0j), '(3+2j)\x00\x00\x00\x00\x00\x00')
+ self.assertEqual('{0:\x01<12}'.format(3+2.0j), '(3+2j)\x01\x01\x01\x01\x01\x01')
+ self.assertEqual('{0:\x00^12}'.format(3+2.0j), '\x00\x00\x00(3+2j)\x00\x00\x00')
+ self.assertEqual('{0:^12}'.format(3+2.0j), ' (3+2j) ')
+
# format specifiers for user defined type
self.assertEqual('{0:abc}'.format(C()), 'abc')
@@ -869,11 +1064,9 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('{0:d}'.format(G('data')), 'G(data)')
self.assertEqual('{0!s}'.format(G('data')), 'string is data')
- msg = 'object.__format__ with a non-empty format string is deprecated'
- with support.check_warnings((msg, DeprecationWarning)):
- self.assertEqual('{0:^10}'.format(E('data')), ' E(data) ')
- self.assertEqual('{0:^10s}'.format(E('data')), ' E(data) ')
- self.assertEqual('{0:>15s}'.format(G('data')), ' string is data')
+ self.assertRaises(TypeError, '{0:^10}'.format, E('data'))
+ self.assertRaises(TypeError, '{0:^10s}'.format, E('data'))
+ self.assertRaises(TypeError, '{0:>15s}'.format, G('data'))
self.assertEqual("{0:date: %Y-%m-%d}".format(I(year=2007,
month=8,
@@ -909,7 +1102,7 @@ class UnicodeTest(string_tests.CommonTest,
self.assertRaises(ValueError, "{0".format)
self.assertRaises(IndexError, "{0.}".format)
self.assertRaises(ValueError, "{0.}".format, 0)
- self.assertRaises(IndexError, "{0[}".format)
+ self.assertRaises(ValueError, "{0[}".format)
self.assertRaises(ValueError, "{0[}".format, [])
self.assertRaises(KeyError, "{0]}".format)
self.assertRaises(ValueError, "{0.[]}".format, 0)
@@ -961,6 +1154,15 @@ class UnicodeTest(string_tests.CommonTest,
'')
self.assertEqual("{[{}]}".format({"{}": 5}), "5")
+ self.assertEqual("{[{}]}".format({"{}" : "a"}), "a")
+ self.assertEqual("{[{]}".format({"{" : "a"}), "a")
+ self.assertEqual("{[}]}".format({"}" : "a"}), "a")
+ self.assertEqual("{[[]}".format({"[" : "a"}), "a")
+ self.assertEqual("{[!]}".format({"!" : "a"}), "a")
+ self.assertRaises(ValueError, "{a{}b}".format, 42)
+ self.assertRaises(ValueError, "{a{b}".format, 42)
+ self.assertRaises(ValueError, "{[}".format, 42)
+
self.assertEqual("0x{:0{:d}X}".format(0x0,16), "0x0000000000000000")
def test_format_map(self):
@@ -1114,6 +1316,67 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('%.1s' % "a\xe9\u20ac", 'a')
self.assertEqual('%.2s' % "a\xe9\u20ac", 'a\xe9')
+ #issue 19995
+ class PsuedoInt:
+ def __init__(self, value):
+ self.value = int(value)
+ def __int__(self):
+ return self.value
+ def __index__(self):
+ return self.value
+ class PsuedoFloat:
+ def __init__(self, value):
+ self.value = float(value)
+ def __int__(self):
+ return int(self.value)
+ pi = PsuedoFloat(3.1415)
+ letter_m = PsuedoInt(109)
+ self.assertEqual('%x' % 42, '2a')
+ self.assertEqual('%X' % 15, 'F')
+ self.assertEqual('%o' % 9, '11')
+ self.assertEqual('%c' % 109, 'm')
+ self.assertEqual('%x' % letter_m, '6d')
+ self.assertEqual('%X' % letter_m, '6D')
+ self.assertEqual('%o' % letter_m, '155')
+ self.assertEqual('%c' % letter_m, 'm')
+ self.assertWarns(DeprecationWarning, '%x'.__mod__, pi),
+ self.assertWarns(DeprecationWarning, '%x'.__mod__, 3.14),
+ self.assertWarns(DeprecationWarning, '%X'.__mod__, 2.11),
+ self.assertWarns(DeprecationWarning, '%o'.__mod__, 1.79),
+ self.assertWarns(DeprecationWarning, '%c'.__mod__, pi),
+
+ def test_formatting_with_enum(self):
+ # issue18780
+ import enum
+ class Float(float, enum.Enum):
+ PI = 3.1415926
+ class Int(enum.IntEnum):
+ IDES = 15
+ class Str(str, enum.Enum):
+ ABC = 'abc'
+ # Testing Unicode formatting strings...
+ self.assertEqual("%s, %s" % (Str.ABC, Str.ABC),
+ 'Str.ABC, Str.ABC')
+ self.assertEqual("%s, %s, %d, %i, %u, %f, %5.2f" %
+ (Str.ABC, Str.ABC,
+ Int.IDES, Int.IDES, Int.IDES,
+ Float.PI, Float.PI),
+ 'Str.ABC, Str.ABC, 15, 15, 15, 3.141593, 3.14')
+
+ # formatting jobs delegated from the string implementation:
+ self.assertEqual('...%(foo)s...' % {'foo':Str.ABC},
+ '...Str.ABC...')
+ self.assertEqual('...%(foo)s...' % {'foo':Int.IDES},
+ '...Int.IDES...')
+ self.assertEqual('...%(foo)i...' % {'foo':Int.IDES},
+ '...15...')
+ self.assertEqual('...%(foo)d...' % {'foo':Int.IDES},
+ '...15...')
+ self.assertEqual('...%(foo)u...' % {'foo':Int.IDES, 'def':Float.PI},
+ '...15...')
+ self.assertEqual('...%(foo)f...' % {'foo':Float.PI,'def':123},
+ '...3.141593...')
+
def test_formatting_huge_precision(self):
format_string = "%.{}f".format(sys.maxsize + 1)
with self.assertRaises(ValueError):
@@ -1152,11 +1415,8 @@ class UnicodeTest(string_tests.CommonTest,
'unicode remains unicode'
)
- class UnicodeSubclass(str):
- pass
-
for text in ('ascii', '\xe9', '\u20ac', '\U0010FFFF'):
- subclass = UnicodeSubclass(text)
+ subclass = StrSubclass(text)
self.assertEqual(str(subclass), text)
self.assertEqual(len(subclass), len(text))
if text == 'ascii':
@@ -1272,7 +1532,7 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(b'+2AHab9ze-'.decode('utf-7'), '\uD801\U000abcde')
# Issue #2242: crash on some Windows/MSVC versions
- self.assertEqual(b'+\xc1'.decode('utf-7'), '\xc1')
+ self.assertEqual(b'+\xc1'.decode('utf-7', 'ignore'), '')
# Direct encoded characters
set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"
@@ -1341,9 +1601,9 @@ class UnicodeTest(string_tests.CommonTest,
def test_utf8_decode_invalid_sequences(self):
# continuation bytes in a sequence of 2, 3, or 4 bytes
continuation_bytes = [bytes([x]) for x in range(0x80, 0xC0)]
- # start bytes of a 2-byte sequence equivalent to codepoints < 0x7F
+ # start bytes of a 2-byte sequence equivalent to code points < 0x7F
invalid_2B_seq_start_bytes = [bytes([x]) for x in range(0xC0, 0xC2)]
- # start bytes of a 4-byte sequence equivalent to codepoints > 0x10FFFF
+ # start bytes of a 4-byte sequence equivalent to code points > 0x10FFFF
invalid_4B_seq_start_bytes = [bytes([x]) for x in range(0xF5, 0xF8)]
invalid_start_bytes = (
continuation_bytes + invalid_2B_seq_start_bytes +
@@ -1714,6 +1974,7 @@ class UnicodeTest(string_tests.CommonTest,
self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii', 'strict')
self.assertEqual(str(b'Andr\202 x', 'ascii', 'ignore'), "Andr x")
self.assertEqual(str(b'Andr\202 x', 'ascii', 'replace'), 'Andr\uFFFD x')
+ self.assertEqual(str(b'\202 x', 'ascii', 'replace'), '\uFFFD x')
# Error handling (unknown character names)
self.assertEqual(b"\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")
@@ -1794,10 +2055,10 @@ class UnicodeTest(string_tests.CommonTest,
# 0-127
s = bytes(range(128))
for encoding in (
- 'cp037', 'cp1026',
+ 'cp037', 'cp1026', 'cp273',
'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850',
'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862',
- 'cp863', 'cp865', 'cp866',
+ 'cp863', 'cp865', 'cp866', 'cp1125',
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
@@ -1822,10 +2083,10 @@ class UnicodeTest(string_tests.CommonTest,
# 128-255
s = bytes(range(128, 256))
for encoding in (
- 'cp037', 'cp1026',
+ 'cp037', 'cp1026', 'cp273',
'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850',
'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862',
- 'cp863', 'cp865', 'cp866',
+ 'cp863', 'cp865', 'cp866', 'cp1125',
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
'iso8859_2', 'iso8859_4', 'iso8859_5',
'iso8859_9', 'koi8_r', 'latin_1',
@@ -1888,64 +2149,29 @@ class UnicodeTest(string_tests.CommonTest,
self.fail("Should have raised UnicodeDecodeError")
def test_conversion(self):
- # Make sure __unicode__() works properly
- class Foo0:
+ # Make sure __str__() works properly
+ class ObjectToStr:
def __str__(self):
return "foo"
- class Foo1:
+ class StrSubclassToStr(str):
def __str__(self):
return "foo"
- class Foo2(object):
- def __str__(self):
- return "foo"
-
- class Foo3(object):
- def __str__(self):
- return "foo"
-
- class Foo4(str):
- def __str__(self):
- return "foo"
-
- class Foo5(str):
- def __str__(self):
- return "foo"
-
- class Foo6(str):
- def __str__(self):
- return "foos"
-
- def __str__(self):
- return "foou"
-
- class Foo7(str):
- def __str__(self):
- return "foos"
- def __str__(self):
- return "foou"
-
- class Foo8(str):
+ class StrSubclassToStrSubclass(str):
def __new__(cls, content=""):
return str.__new__(cls, 2*content)
def __str__(self):
return self
- class Foo9(str):
- def __str__(self):
- return "not unicode"
-
- self.assertEqual(str(Foo0()), "foo")
- self.assertEqual(str(Foo1()), "foo")
- self.assertEqual(str(Foo2()), "foo")
- self.assertEqual(str(Foo3()), "foo")
- self.assertEqual(str(Foo4("bar")), "foo")
- self.assertEqual(str(Foo5("bar")), "foo")
- self.assertEqual(str(Foo6("bar")), "foou")
- self.assertEqual(str(Foo7("bar")), "foou")
- self.assertEqual(str(Foo8("foo")), "foofoo")
- self.assertEqual(str(Foo9("foo")), "not unicode")
+ self.assertEqual(str(ObjectToStr()), "foo")
+ self.assertEqual(str(StrSubclassToStr("bar")), "foo")
+ s = str(StrSubclassToStrSubclass("foo"))
+ self.assertEqual(s, "foofoo")
+ self.assertIs(type(s), StrSubclassToStrSubclass)
+ s = StrSubclass(StrSubclassToStrSubclass("foo"))
+ self.assertEqual(s, "foofoo")
+ self.assertIs(type(s), StrSubclass)
def test_unicode_repr(self):
class s1:
@@ -2068,13 +2294,82 @@ class UnicodeTest(string_tests.CommonTest,
check_format('%abc',
b'%%%s', b'abc')
- # test %S
- check_format("repr=\u20acABC",
- b'repr=%S', '\u20acABC')
-
- # test %R
- check_format("repr='\u20acABC'",
- b'repr=%R', '\u20acABC')
+ # truncated string
+ check_format('abc',
+ b'%.3s', b'abcdef')
+ check_format('abc[\ufffd',
+ b'%.5s', 'abc[\u20ac]'.encode('utf8'))
+ check_format("'\\u20acABC'",
+ b'%A', '\u20acABC')
+ check_format("'\\u20",
+ b'%.5A', '\u20acABCDEF')
+ check_format("'\u20acABC'",
+ b'%R', '\u20acABC')
+ check_format("'\u20acA",
+ b'%.3R', '\u20acABCDEF')
+ check_format('\u20acAB',
+ b'%.3S', '\u20acABCDEF')
+ check_format('\u20acAB',
+ b'%.3U', '\u20acABCDEF')
+ check_format('\u20acAB',
+ b'%.3V', '\u20acABCDEF', None)
+ check_format('abc[\ufffd',
+ b'%.5V', None, 'abc[\u20ac]'.encode('utf8'))
+
+ # following tests comes from #7330
+ # test width modifier and precision modifier with %S
+ check_format("repr= abc",
+ b'repr=%5S', 'abc')
+ check_format("repr=ab",
+ b'repr=%.2S', 'abc')
+ check_format("repr= ab",
+ b'repr=%5.2S', 'abc')
+
+ # test width modifier and precision modifier with %R
+ check_format("repr= 'abc'",
+ b'repr=%8R', 'abc')
+ check_format("repr='ab",
+ b'repr=%.3R', 'abc')
+ check_format("repr= 'ab",
+ b'repr=%5.3R', 'abc')
+
+ # test width modifier and precision modifier with %A
+ check_format("repr= 'abc'",
+ b'repr=%8A', 'abc')
+ check_format("repr='ab",
+ b'repr=%.3A', 'abc')
+ check_format("repr= 'ab",
+ b'repr=%5.3A', 'abc')
+
+ # test width modifier and precision modifier with %s
+ check_format("repr= abc",
+ b'repr=%5s', b'abc')
+ check_format("repr=ab",
+ b'repr=%.2s', b'abc')
+ check_format("repr= ab",
+ b'repr=%5.2s', b'abc')
+
+ # test width modifier and precision modifier with %U
+ check_format("repr= abc",
+ b'repr=%5U', 'abc')
+ check_format("repr=ab",
+ b'repr=%.2U', 'abc')
+ check_format("repr= ab",
+ b'repr=%5.2U', 'abc')
+
+ # test width modifier and precision modifier with %V
+ check_format("repr= abc",
+ b'repr=%5V', 'abc', b'123')
+ check_format("repr=ab",
+ b'repr=%.2V', 'abc', b'123')
+ check_format("repr= ab",
+ b'repr=%5.2V', 'abc', b'123')
+ check_format("repr= 123",
+ b'repr=%5V', None, b'123')
+ check_format("repr=12",
+ b'repr=%.2V', None, b'123')
+ check_format("repr= 12",
+ b'repr=%5.2V', None, b'123')
# test integer formats (%i, %d, %u)
check_format('010',
@@ -2125,8 +2420,8 @@ class UnicodeTest(string_tests.CommonTest,
b'%010i', c_int(123))
check_format('123'.rjust(100),
b'%100i', c_int(123))
- check_format('123'.rjust(300, '0'),
- b'%.300i', c_int(123))
+ check_format('123'.rjust(100, '0'),
+ b'%.100i', c_int(123))
check_format('123'.rjust(80, '0').rjust(100),
b'%100.80i', c_int(123))
@@ -2134,8 +2429,8 @@ class UnicodeTest(string_tests.CommonTest,
b'%010u', c_uint(123))
check_format('123'.rjust(100),
b'%100u', c_uint(123))
- check_format('123'.rjust(300, '0'),
- b'%.300u', c_uint(123))
+ check_format('123'.rjust(100, '0'),
+ b'%.100u', c_uint(123))
check_format('123'.rjust(80, '0').rjust(100),
b'%100.80u', c_uint(123))
@@ -2143,8 +2438,8 @@ class UnicodeTest(string_tests.CommonTest,
b'%010x', c_int(0x123))
check_format('123'.rjust(100),
b'%100x', c_int(0x123))
- check_format('123'.rjust(300, '0'),
- b'%.300x', c_int(0x123))
+ check_format('123'.rjust(100, '0'),
+ b'%.100x', c_int(0x123))
check_format('123'.rjust(80, '0').rjust(100),
b'%100.80x', c_int(0x123))
@@ -2303,6 +2598,80 @@ class UnicodeTest(string_tests.CommonTest,
self.assertNotEqual(abc, abcdef)
self.assertEqual(abcdef.decode('unicode_internal'), text)
+ def test_compare(self):
+ # Issue #17615
+ N = 10
+ ascii = 'a' * N
+ ascii2 = 'z' * N
+ latin = '\x80' * N
+ latin2 = '\xff' * N
+ bmp = '\u0100' * N
+ bmp2 = '\uffff' * N
+ astral = '\U00100000' * N
+ astral2 = '\U0010ffff' * N
+ strings = (
+ ascii, ascii2,
+ latin, latin2,
+ bmp, bmp2,
+ astral, astral2)
+ for text1, text2 in itertools.combinations(strings, 2):
+ equal = (text1 is text2)
+ self.assertEqual(text1 == text2, equal)
+ self.assertEqual(text1 != text2, not equal)
+
+ if equal:
+ self.assertTrue(text1 <= text2)
+ self.assertTrue(text1 >= text2)
+
+ # text1 is text2: duplicate strings to skip the "str1 == str2"
+ # optimization in unicode_compare_eq() and really compare
+ # character per character
+ copy1 = duplicate_string(text1)
+ copy2 = duplicate_string(text2)
+ self.assertIsNot(copy1, copy2)
+
+ self.assertTrue(copy1 == copy2)
+ self.assertFalse(copy1 != copy2)
+
+ self.assertTrue(copy1 <= copy2)
+ self.assertTrue(copy2 >= copy2)
+
+ self.assertTrue(ascii < ascii2)
+ self.assertTrue(ascii < latin)
+ self.assertTrue(ascii < bmp)
+ self.assertTrue(ascii < astral)
+ self.assertFalse(ascii >= ascii2)
+ self.assertFalse(ascii >= latin)
+ self.assertFalse(ascii >= bmp)
+ self.assertFalse(ascii >= astral)
+
+ self.assertFalse(latin < ascii)
+ self.assertTrue(latin < latin2)
+ self.assertTrue(latin < bmp)
+ self.assertTrue(latin < astral)
+ self.assertTrue(latin >= ascii)
+ self.assertFalse(latin >= latin2)
+ self.assertFalse(latin >= bmp)
+ self.assertFalse(latin >= astral)
+
+ self.assertFalse(bmp < ascii)
+ self.assertFalse(bmp < latin)
+ self.assertTrue(bmp < bmp2)
+ self.assertTrue(bmp < astral)
+ self.assertTrue(bmp >= ascii)
+ self.assertTrue(bmp >= latin)
+ self.assertFalse(bmp >= bmp2)
+ self.assertFalse(bmp >= astral)
+
+ self.assertFalse(astral < ascii)
+ self.assertFalse(astral < latin)
+ self.assertFalse(astral < bmp2)
+ self.assertTrue(astral < astral2)
+ self.assertTrue(astral >= ascii)
+ self.assertTrue(astral >= latin)
+ self.assertTrue(astral >= bmp2)
+ self.assertFalse(astral >= astral2)
+
@support.cpython_only
def test_pep393_utf8_caching_bug(self):
# Issue #25709: Problem with string concatenation and utf-8 cache