summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_unicode.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_unicode.py')
-rw-r--r--Lib/test/test_unicode.py630
1 files changed, 516 insertions, 114 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 21005eb..ef0fd1c 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -5,17 +5,13 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
+import _string
import codecs
import struct
import sys
import unittest
import warnings
from test import support, string_tests
-import _string
-
-# decorator to skip tests on narrow builds
-requires_wide_build = unittest.skipIf(sys.maxunicode == 65535,
- 'requires wide build')
# Error handling (bad decoder return)
def search_function(encoding):
@@ -37,7 +33,8 @@ codecs.register(search_function)
class UnicodeTest(string_tests.CommonTest,
string_tests.MixinStrUnicodeUserStringTest,
- string_tests.MixinStrUnicodeTest):
+ string_tests.MixinStrUnicodeTest,
+ unittest.TestCase):
type2test = str
@@ -175,6 +172,15 @@ class UnicodeTest(string_tests.CommonTest,
def test_find(self):
string_tests.CommonTest.test_find(self)
+ # test implementation details of the memchr fast path
+ self.checkequal(100, 'a' * 100 + '\u0102', 'find', '\u0102')
+ self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0201')
+ self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0120')
+ self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0220')
+ self.checkequal(100, 'a' * 100 + '\U00100304', 'find', '\U00100304')
+ self.checkequal(-1, 'a' * 100 + '\U00100304', 'find', '\U00100204')
+ self.checkequal(-1, 'a' * 100 + '\U00100304', 'find', '\U00102004')
+ # check mixed argument types
self.checkequalnofix(0, 'abcdefghiabc', 'find', 'abc')
self.checkequalnofix(9, 'abcdefghiabc', 'find', 'abc', 1)
self.checkequalnofix(-1, 'abcdefghiabc', 'find', 'def', 4)
@@ -184,6 +190,14 @@ class UnicodeTest(string_tests.CommonTest,
def test_rfind(self):
string_tests.CommonTest.test_rfind(self)
+ # test implementation details of the memrchr fast path
+ self.checkequal(0, '\u0102' + 'a' * 100 , 'rfind', '\u0102')
+ self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0201')
+ self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0120')
+ self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0220')
+ self.checkequal(0, '\U00100304' + 'a' * 100, 'rfind', '\U00100304')
+ self.checkequal(-1, '\U00100304' + 'a' * 100, 'rfind', '\U00100204')
+ self.checkequal(-1, '\U00100304' + 'a' * 100, 'rfind', '\U00102004')
# check mixed argument types
self.checkequalnofix(9, 'abcdefghiabc', 'rfind', 'abc')
self.checkequalnofix(12, 'abcdefghiabc', 'rfind', '')
@@ -280,6 +294,12 @@ class UnicodeTest(string_tests.CommonTest,
self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)
self.assertRaises(TypeError, 'replace'.replace, "r", 42)
+ @support.cpython_only
+ def test_replace_id(self):
+ pattern = 'abc'
+ text = 'abc def'
+ self.assertIs(text.replace(pattern, pattern), text)
+
def test_bytes_comparison(self):
with support.check_warnings():
warnings.simplefilter('ignore', BytesWarning)
@@ -350,6 +370,8 @@ class UnicodeTest(string_tests.CommonTest,
def test_islower(self):
string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
self.checkequalnofix(False, '\u1FFc', 'islower')
+ self.assertFalse('\u2167'.islower())
+ self.assertTrue('\u2177'.islower())
# non-BMP, uppercase
self.assertFalse('\U00010401'.islower())
self.assertFalse('\U00010427'.islower())
@@ -364,6 +386,8 @@ class UnicodeTest(string_tests.CommonTest,
string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
if not sys.platform.startswith('java'):
self.checkequalnofix(False, '\u1FFc', 'isupper')
+ self.assertTrue('\u2167'.isupper())
+ self.assertFalse('\u2177'.isupper())
# non-BMP, uppercase
self.assertTrue('\U00010401'.isupper())
self.assertTrue('\U00010427'.isupper())
@@ -520,7 +544,6 @@ class UnicodeTest(string_tests.CommonTest,
self.assertFalse(meth(s), '%a.%s() is False' % (s, meth_name))
- @requires_wide_build
def test_lower(self):
string_tests.CommonTest.test_lower(self)
self.assertEqual('\U00010427'.lower(), '\U0001044F')
@@ -530,8 +553,28 @@ class UnicodeTest(string_tests.CommonTest,
'\U0001044F\U0001044F')
self.assertEqual('X\U00010427x\U0001044F'.lower(),
'x\U0001044Fx\U0001044F')
+ self.assertEqual('fi'.lower(), 'fi')
+ self.assertEqual('\u0130'.lower(), '\u0069\u0307')
+ # Special case for GREEK CAPITAL LETTER SIGMA U+03A3
+ self.assertEqual('\u03a3'.lower(), '\u03c3')
+ self.assertEqual('\u0345\u03a3'.lower(), '\u0345\u03c3')
+ self.assertEqual('A\u0345\u03a3'.lower(), 'a\u0345\u03c2')
+ self.assertEqual('A\u0345\u03a3a'.lower(), 'a\u0345\u03c3a')
+ self.assertEqual('A\u0345\u03a3'.lower(), 'a\u0345\u03c2')
+ self.assertEqual('A\u03a3\u0345'.lower(), 'a\u03c2\u0345')
+ self.assertEqual('\u03a3\u0345 '.lower(), '\u03c3\u0345 ')
+ self.assertEqual('\U0008fffe'.lower(), '\U0008fffe')
+ self.assertEqual('\u2177'.lower(), '\u2177')
+
+ def test_casefold(self):
+ self.assertEqual('hello'.casefold(), 'hello')
+ self.assertEqual('hELlo'.casefold(), 'hello')
+ self.assertEqual('ß'.casefold(), 'ss')
+ self.assertEqual('fi'.casefold(), 'fi')
+ self.assertEqual('\u03a3'.casefold(), '\u03c3')
+ self.assertEqual('A\u0345\u03a3'.casefold(), 'a\u03b9\u03c3')
+ self.assertEqual('\u00b5'.casefold(), '\u03bc')
- @requires_wide_build
def test_upper(self):
string_tests.CommonTest.test_upper(self)
self.assertEqual('\U0001044F'.upper(), '\U00010427')
@@ -541,8 +584,14 @@ class UnicodeTest(string_tests.CommonTest,
'\U00010427\U00010427')
self.assertEqual('X\U00010427x\U0001044F'.upper(),
'X\U00010427X\U00010427')
+ self.assertEqual('fi'.upper(), 'FI')
+ self.assertEqual('\u0130'.upper(), '\u0130')
+ self.assertEqual('\u03a3'.upper(), '\u03a3')
+ self.assertEqual('ß'.upper(), 'SS')
+ self.assertEqual('\u1fd2'.upper(), '\u0399\u0308\u0300')
+ self.assertEqual('\U0008fffe'.upper(), '\U0008fffe')
+ self.assertEqual('\u2177'.upper(), '\u2167')
- @requires_wide_build
def test_capitalize(self):
string_tests.CommonTest.test_capitalize(self)
self.assertEqual('\U0001044F'.capitalize(), '\U00010427')
@@ -554,8 +603,12 @@ class UnicodeTest(string_tests.CommonTest,
'\U00010427\U0001044F')
self.assertEqual('X\U00010427x\U0001044F'.capitalize(),
'X\U0001044Fx\U0001044F')
+ self.assertEqual('h\u0130'.capitalize(), 'H\u0069\u0307')
+ exp = '\u0399\u0308\u0300\u0069\u0307'
+ self.assertEqual('\u1fd2\u0130'.capitalize(), exp)
+ self.assertEqual('finnish'.capitalize(), 'FInnish')
+ self.assertEqual('A\u0345\u03a3'.capitalize(), 'A\u0345\u03c2')
- @requires_wide_build
def test_title(self):
string_tests.MixinStrUnicodeUserStringTest.test_title(self)
self.assertEqual('\U0001044F'.title(), '\U00010427')
@@ -569,8 +622,10 @@ class UnicodeTest(string_tests.CommonTest,
'\U00010427\U0001044F \U00010427\U0001044F')
self.assertEqual('X\U00010427x\U0001044F X\U00010427x\U0001044F'.title(),
'X\U0001044Fx\U0001044F X\U0001044Fx\U0001044F')
+ self.assertEqual('fiNNISH'.title(), 'Finnish')
+ self.assertEqual('A\u03a3 \u1fa1xy'.title(), 'A\u03c2 \u1fa9xy')
+ self.assertEqual('A\u03a3A'.title(), 'A\u03c3a')
- @requires_wide_build
def test_swapcase(self):
string_tests.CommonTest.test_swapcase(self)
self.assertEqual('\U0001044F'.swapcase(), '\U00010427')
@@ -583,6 +638,34 @@ class UnicodeTest(string_tests.CommonTest,
'\U00010427\U0001044F')
self.assertEqual('X\U00010427x\U0001044F'.swapcase(),
'x\U0001044FX\U00010427')
+ self.assertEqual('fi'.swapcase(), 'FI')
+ self.assertEqual('\u0130'.swapcase(), '\u0069\u0307')
+ # Special case for GREEK CAPITAL LETTER SIGMA U+03A3
+ self.assertEqual('\u03a3'.swapcase(), '\u03c3')
+ self.assertEqual('\u0345\u03a3'.swapcase(), '\u0399\u03c3')
+ self.assertEqual('A\u0345\u03a3'.swapcase(), 'a\u0399\u03c2')
+ self.assertEqual('A\u0345\u03a3a'.swapcase(), 'a\u0399\u03c3A')
+ self.assertEqual('A\u0345\u03a3'.swapcase(), 'a\u0399\u03c2')
+ self.assertEqual('A\u03a3\u0345'.swapcase(), 'a\u03c2\u0399')
+ self.assertEqual('\u03a3\u0345 '.swapcase(), '\u03c3\u0399 ')
+ self.assertEqual('\u03a3'.swapcase(), '\u03c3')
+ self.assertEqual('ß'.swapcase(), 'SS')
+ self.assertEqual('\u1fd2'.swapcase(), '\u0399\u0308\u0300')
+
+ def test_center(self):
+ string_tests.CommonTest.test_center(self)
+ self.assertEqual('x'.center(2, '\U0010FFFF'),
+ 'x\U0010FFFF')
+ self.assertEqual('x'.center(3, '\U0010FFFF'),
+ '\U0010FFFFx\U0010FFFF')
+ self.assertEqual('x'.center(4, '\U0010FFFF'),
+ '\U0010FFFFx\U0010FFFF\U0010FFFF')
+
+ @unittest.skipUnless(sys.maxsize == 2**31 - 1, "requires 32-bit system")
+ @support.cpython_only
+ def test_case_operation_overflow(self):
+ # Issue #22643
+ self.assertRaises(OverflowError, ("ü"*(2**32//12 + 1)).upper)
def test_contains(self):
# Testing Unicode contains method
@@ -615,6 +698,17 @@ class UnicodeTest(string_tests.CommonTest,
self.assertRaises(TypeError, "abc".__contains__)
+ def test_issue18183(self):
+ '\U00010000\U00100000'.lower()
+ '\U00010000\U00100000'.casefold()
+ '\U00010000\U00100000'.upper()
+ '\U00010000\U00100000'.capitalize()
+ '\U00010000\U00100000'.title()
+ '\U00010000\U00100000'.swapcase()
+ '\U00100000'.center(3, '\U00010000')
+ '\U00100000'.ljust(3, '\U00010000')
+ '\U00100000'.rjust(3, '\U00010000')
+
def test_format(self):
self.assertEqual(''.format(), '')
self.assertEqual('a'.format(), 'a')
@@ -776,7 +870,7 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('{0!s}'.format(G('data')), 'string is data')
msg = 'object.__format__ with a non-empty format string is deprecated'
- with support.check_warnings((msg, PendingDeprecationWarning)):
+ with support.check_warnings((msg, DeprecationWarning)):
self.assertEqual('{0:^10}'.format(E('data')), ' E(data) ')
self.assertEqual('{0:^10s}'.format(E('data')), ' E(data) ')
self.assertEqual('{0:>15s}'.format(G('data')), ' string is data')
@@ -858,6 +952,17 @@ class UnicodeTest(string_tests.CommonTest,
self.assertRaises(ValueError, format, '', '#')
self.assertRaises(ValueError, format, '', '#20')
+ # Non-ASCII
+ self.assertEqual("{0:s}{1:s}".format("ABC", "\u0410\u0411\u0412"),
+ 'ABC\u0410\u0411\u0412')
+ self.assertEqual("{0:.3s}".format("ABC\u0410\u0411\u0412"),
+ 'ABC')
+ self.assertEqual("{0:.0s}".format("ABC\u0410\u0411\u0412"),
+ '')
+
+ self.assertEqual("{[{}]}".format({"{}": 5}), "5")
+ self.assertEqual("0x{:0{:d}X}".format(0x0,16), "0x0000000000000000")
+
def test_format_map(self):
self.assertEqual(''.format_map({}), '')
self.assertEqual('a'.format_map({}), 'a')
@@ -1005,8 +1110,17 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('%f' % INF, 'inf')
self.assertEqual('%F' % INF, 'INF')
- @support.cpython_only
+ # PEP 393
+ self.assertEqual('%.1s' % "a\xe9\u20ac", 'a')
+ self.assertEqual('%.2s' % "a\xe9\u20ac", 'a\xe9')
+
def test_formatting_huge_precision(self):
+ format_string = "%.{}f".format(sys.maxsize + 1)
+ with self.assertRaises(ValueError):
+ result = format_string % 2.34
+
+ @support.cpython_only
+ def test_formatting_huge_precision_c_limits(self):
from _testcapi import INT_MAX
format_string = "%.{}f".format(INT_MAX + 1)
with self.assertRaises(ValueError):
@@ -1041,10 +1155,13 @@ class UnicodeTest(string_tests.CommonTest,
class UnicodeSubclass(str):
pass
- self.assertEqual(
- str(UnicodeSubclass('unicode subclass becomes unicode')),
- 'unicode subclass becomes unicode'
- )
+ for text in ('ascii', '\xe9', '\u20ac', '\U0010FFFF'):
+ subclass = UnicodeSubclass(text)
+ self.assertEqual(str(subclass), text)
+ self.assertEqual(len(subclass), len(text))
+ if text == 'ascii':
+ self.assertEqual(subclass.encode('ascii'), b'ascii')
+ self.assertEqual(subclass.encode('utf-8'), b'ascii')
self.assertEqual(
str('strings are converted to unicode'),
@@ -1170,15 +1287,12 @@ class UnicodeTest(string_tests.CommonTest,
def test_codecs_utf8(self):
self.assertEqual(''.encode('utf-8'), b'')
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
- if sys.maxunicode == 65535:
- self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
- self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
+ self.assertEqual('\U00010002'.encode('utf-8'), b'\xf0\x90\x80\x82')
+ self.assertEqual('\U00023456'.encode('utf-8'), b'\xf0\xa3\x91\x96')
self.assertEqual('\ud800'.encode('utf-8', 'surrogatepass'), b'\xed\xa0\x80')
self.assertEqual('\udc00'.encode('utf-8', 'surrogatepass'), b'\xed\xb0\x80')
- if sys.maxunicode == 65535:
- self.assertEqual(
- ('\ud800\udc02'*1000).encode('utf-8'),
- b'\xf0\x90\x80\x82'*1000)
+ self.assertEqual(('\U00010002'*10).encode('utf-8'),
+ b'\xf0\x90\x80\x82'*10)
self.assertEqual(
'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
@@ -1290,7 +1404,7 @@ class UnicodeTest(string_tests.CommonTest,
# with start byte of a 2-byte sequence
(b'\xc2', FFFD), # only the start byte
(b'\xc2\xc2', FFFD*2), # 2 start bytes
- (b'\xc2\xc2\xc2', FFFD*3), # 2 start bytes
+ (b'\xc2\xc2\xc2', FFFD*3), # 3 start bytes
(b'\xc2\x41', FFFD+'A'), # invalid continuation byte
# with start byte of a 3-byte sequence
(b'\xe1', FFFD), # only the start byte
@@ -1360,6 +1474,226 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(seq.decode('utf-8', 'ignore'),
res.replace('\uFFFD', ''))
+ def to_bytestring(self, seq):
+ return bytes(int(c, 16) for c in seq.split())
+
+ def assertCorrectUTF8Decoding(self, seq, res, err):
+ """
+ Check that an invalid UTF-8 sequence raises an UnicodeDecodeError when
+ 'strict' is used, returns res when 'replace' is used, and that doesn't
+ return anything when 'ignore' is used.
+ """
+ with self.assertRaises(UnicodeDecodeError) as cm:
+ seq.decode('utf-8')
+ exc = cm.exception
+
+ self.assertIn(err, str(exc))
+ self.assertEqual(seq.decode('utf-8', 'replace'), res)
+ self.assertEqual((b'aaaa' + seq + b'bbbb').decode('utf-8', 'replace'),
+ 'aaaa' + res + 'bbbb')
+ res = res.replace('\ufffd', '')
+ self.assertEqual(seq.decode('utf-8', 'ignore'), res)
+ self.assertEqual((b'aaaa' + seq + b'bbbb').decode('utf-8', 'ignore'),
+ 'aaaa' + res + 'bbbb')
+
+ def test_invalid_start_byte(self):
+ """
+ Test that an 'invalid start byte' error is raised when the first byte
+ is not in the ASCII range or is not a valid start byte of a 2-, 3-, or
+ 4-bytes sequence. The invalid start byte is replaced with a single
+ U+FFFD when errors='replace'.
+ E.g. <80> is a continuation byte and can appear only after a start byte.
+ """
+ FFFD = '\ufffd'
+ for byte in b'\x80\xA0\x9F\xBF\xC0\xC1\xF5\xFF':
+ self.assertCorrectUTF8Decoding(bytes([byte]), '\ufffd',
+ 'invalid start byte')
+
+ def test_unexpected_end_of_data(self):
+ """
+ Test that an 'unexpected end of data' error is raised when the string
+ ends after a start byte of a 2-, 3-, or 4-bytes sequence without having
+ enough continuation bytes. The incomplete sequence is replaced with a
+ single U+FFFD when errors='replace'.
+ E.g. in the sequence <F3 80 80>, F3 is the start byte of a 4-bytes
+ sequence, but it's followed by only 2 valid continuation bytes and the
+ last continuation bytes is missing.
+ Note: the continuation bytes must be all valid, if one of them is
+ invalid another error will be raised.
+ """
+ sequences = [
+ 'C2', 'DF',
+ 'E0 A0', 'E0 BF', 'E1 80', 'E1 BF', 'EC 80', 'EC BF',
+ 'ED 80', 'ED 9F', 'EE 80', 'EE BF', 'EF 80', 'EF BF',
+ 'F0 90', 'F0 BF', 'F0 90 80', 'F0 90 BF', 'F0 BF 80', 'F0 BF BF',
+ 'F1 80', 'F1 BF', 'F1 80 80', 'F1 80 BF', 'F1 BF 80', 'F1 BF BF',
+ 'F3 80', 'F3 BF', 'F3 80 80', 'F3 80 BF', 'F3 BF 80', 'F3 BF BF',
+ 'F4 80', 'F4 8F', 'F4 80 80', 'F4 80 BF', 'F4 8F 80', 'F4 8F BF'
+ ]
+ FFFD = '\ufffd'
+ for seq in sequences:
+ self.assertCorrectUTF8Decoding(self.to_bytestring(seq), '\ufffd',
+ 'unexpected end of data')
+
+ def test_invalid_cb_for_2bytes_seq(self):
+ """
+ Test that an 'invalid continuation byte' error is raised when the
+ continuation byte of a 2-bytes sequence is invalid. The start byte
+ is replaced by a single U+FFFD and the second byte is handled
+ separately when errors='replace'.
+ E.g. in the sequence <C2 41>, C2 is the start byte of a 2-bytes
+ sequence, but 41 is not a valid continuation byte because it's the
+ ASCII letter 'A'.
+ """
+ FFFD = '\ufffd'
+ FFFDx2 = FFFD * 2
+ sequences = [
+ ('C2 00', FFFD+'\x00'), ('C2 7F', FFFD+'\x7f'),
+ ('C2 C0', FFFDx2), ('C2 FF', FFFDx2),
+ ('DF 00', FFFD+'\x00'), ('DF 7F', FFFD+'\x7f'),
+ ('DF C0', FFFDx2), ('DF FF', FFFDx2),
+ ]
+ for seq, res in sequences:
+ self.assertCorrectUTF8Decoding(self.to_bytestring(seq), res,
+ 'invalid continuation byte')
+
+ def test_invalid_cb_for_3bytes_seq(self):
+ """
+ Test that an 'invalid continuation byte' error is raised when the
+ continuation byte(s) of a 3-bytes sequence are invalid. When
+ errors='replace', if the first continuation byte is valid, the first
+ two bytes (start byte + 1st cb) are replaced by a single U+FFFD and the
+ third byte is handled separately, otherwise only the start byte is
+ replaced with a U+FFFD and the other continuation bytes are handled
+ separately.
+ E.g. in the sequence <E1 80 41>, E1 is the start byte of a 3-bytes
+ sequence, 80 is a valid continuation byte, but 41 is not a valid cb
+ because it's the ASCII letter 'A'.
+ Note: when the start byte is E0 or ED, the valid ranges for the first
+ continuation byte are limited to A0..BF and 80..9F respectively.
+ Python 2 used to consider all the bytes in range 80..BF valid when the
+ start byte was ED. This is fixed in Python 3.
+ """
+ FFFD = '\ufffd'
+ FFFDx2 = FFFD * 2
+ sequences = [
+ ('E0 00', FFFD+'\x00'), ('E0 7F', FFFD+'\x7f'), ('E0 80', FFFDx2),
+ ('E0 9F', FFFDx2), ('E0 C0', FFFDx2), ('E0 FF', FFFDx2),
+ ('E0 A0 00', FFFD+'\x00'), ('E0 A0 7F', FFFD+'\x7f'),
+ ('E0 A0 C0', FFFDx2), ('E0 A0 FF', FFFDx2),
+ ('E0 BF 00', FFFD+'\x00'), ('E0 BF 7F', FFFD+'\x7f'),
+ ('E0 BF C0', FFFDx2), ('E0 BF FF', FFFDx2), ('E1 00', FFFD+'\x00'),
+ ('E1 7F', FFFD+'\x7f'), ('E1 C0', FFFDx2), ('E1 FF', FFFDx2),
+ ('E1 80 00', FFFD+'\x00'), ('E1 80 7F', FFFD+'\x7f'),
+ ('E1 80 C0', FFFDx2), ('E1 80 FF', FFFDx2),
+ ('E1 BF 00', FFFD+'\x00'), ('E1 BF 7F', FFFD+'\x7f'),
+ ('E1 BF C0', FFFDx2), ('E1 BF FF', FFFDx2), ('EC 00', FFFD+'\x00'),
+ ('EC 7F', FFFD+'\x7f'), ('EC C0', FFFDx2), ('EC FF', FFFDx2),
+ ('EC 80 00', FFFD+'\x00'), ('EC 80 7F', FFFD+'\x7f'),
+ ('EC 80 C0', FFFDx2), ('EC 80 FF', FFFDx2),
+ ('EC BF 00', FFFD+'\x00'), ('EC BF 7F', FFFD+'\x7f'),
+ ('EC BF C0', FFFDx2), ('EC BF FF', FFFDx2), ('ED 00', FFFD+'\x00'),
+ ('ED 7F', FFFD+'\x7f'),
+ ('ED A0', FFFDx2), ('ED BF', FFFDx2), # see note ^
+ ('ED C0', FFFDx2), ('ED FF', FFFDx2), ('ED 80 00', FFFD+'\x00'),
+ ('ED 80 7F', FFFD+'\x7f'), ('ED 80 C0', FFFDx2),
+ ('ED 80 FF', FFFDx2), ('ED 9F 00', FFFD+'\x00'),
+ ('ED 9F 7F', FFFD+'\x7f'), ('ED 9F C0', FFFDx2),
+ ('ED 9F FF', FFFDx2), ('EE 00', FFFD+'\x00'),
+ ('EE 7F', FFFD+'\x7f'), ('EE C0', FFFDx2), ('EE FF', FFFDx2),
+ ('EE 80 00', FFFD+'\x00'), ('EE 80 7F', FFFD+'\x7f'),
+ ('EE 80 C0', FFFDx2), ('EE 80 FF', FFFDx2),
+ ('EE BF 00', FFFD+'\x00'), ('EE BF 7F', FFFD+'\x7f'),
+ ('EE BF C0', FFFDx2), ('EE BF FF', FFFDx2), ('EF 00', FFFD+'\x00'),
+ ('EF 7F', FFFD+'\x7f'), ('EF C0', FFFDx2), ('EF FF', FFFDx2),
+ ('EF 80 00', FFFD+'\x00'), ('EF 80 7F', FFFD+'\x7f'),
+ ('EF 80 C0', FFFDx2), ('EF 80 FF', FFFDx2),
+ ('EF BF 00', FFFD+'\x00'), ('EF BF 7F', FFFD+'\x7f'),
+ ('EF BF C0', FFFDx2), ('EF BF FF', FFFDx2),
+ ]
+ for seq, res in sequences:
+ self.assertCorrectUTF8Decoding(self.to_bytestring(seq), res,
+ 'invalid continuation byte')
+
+ def test_invalid_cb_for_4bytes_seq(self):
+ """
+ Test that an 'invalid continuation byte' error is raised when the
+ continuation byte(s) of a 4-bytes sequence are invalid. When
+ errors='replace',the start byte and all the following valid
+ continuation bytes are replaced with a single U+FFFD, and all the bytes
+ starting from the first invalid continuation bytes (included) are
+ handled separately.
+ E.g. in the sequence <E1 80 41>, E1 is the start byte of a 3-bytes
+ sequence, 80 is a valid continuation byte, but 41 is not a valid cb
+ because it's the ASCII letter 'A'.
+ Note: when the start byte is E0 or ED, the valid ranges for the first
+ continuation byte are limited to A0..BF and 80..9F respectively.
+ However, when the start byte is ED, Python 2 considers all the bytes
+ in range 80..BF valid. This is fixed in Python 3.
+ """
+ FFFD = '\ufffd'
+ FFFDx2 = FFFD * 2
+ sequences = [
+ ('F0 00', FFFD+'\x00'), ('F0 7F', FFFD+'\x7f'), ('F0 80', FFFDx2),
+ ('F0 8F', FFFDx2), ('F0 C0', FFFDx2), ('F0 FF', FFFDx2),
+ ('F0 90 00', FFFD+'\x00'), ('F0 90 7F', FFFD+'\x7f'),
+ ('F0 90 C0', FFFDx2), ('F0 90 FF', FFFDx2),
+ ('F0 BF 00', FFFD+'\x00'), ('F0 BF 7F', FFFD+'\x7f'),
+ ('F0 BF C0', FFFDx2), ('F0 BF FF', FFFDx2),
+ ('F0 90 80 00', FFFD+'\x00'), ('F0 90 80 7F', FFFD+'\x7f'),
+ ('F0 90 80 C0', FFFDx2), ('F0 90 80 FF', FFFDx2),
+ ('F0 90 BF 00', FFFD+'\x00'), ('F0 90 BF 7F', FFFD+'\x7f'),
+ ('F0 90 BF C0', FFFDx2), ('F0 90 BF FF', FFFDx2),
+ ('F0 BF 80 00', FFFD+'\x00'), ('F0 BF 80 7F', FFFD+'\x7f'),
+ ('F0 BF 80 C0', FFFDx2), ('F0 BF 80 FF', FFFDx2),
+ ('F0 BF BF 00', FFFD+'\x00'), ('F0 BF BF 7F', FFFD+'\x7f'),
+ ('F0 BF BF C0', FFFDx2), ('F0 BF BF FF', FFFDx2),
+ ('F1 00', FFFD+'\x00'), ('F1 7F', FFFD+'\x7f'), ('F1 C0', FFFDx2),
+ ('F1 FF', FFFDx2), ('F1 80 00', FFFD+'\x00'),
+ ('F1 80 7F', FFFD+'\x7f'), ('F1 80 C0', FFFDx2),
+ ('F1 80 FF', FFFDx2), ('F1 BF 00', FFFD+'\x00'),
+ ('F1 BF 7F', FFFD+'\x7f'), ('F1 BF C0', FFFDx2),
+ ('F1 BF FF', FFFDx2), ('F1 80 80 00', FFFD+'\x00'),
+ ('F1 80 80 7F', FFFD+'\x7f'), ('F1 80 80 C0', FFFDx2),
+ ('F1 80 80 FF', FFFDx2), ('F1 80 BF 00', FFFD+'\x00'),
+ ('F1 80 BF 7F', FFFD+'\x7f'), ('F1 80 BF C0', FFFDx2),
+ ('F1 80 BF FF', FFFDx2), ('F1 BF 80 00', FFFD+'\x00'),
+ ('F1 BF 80 7F', FFFD+'\x7f'), ('F1 BF 80 C0', FFFDx2),
+ ('F1 BF 80 FF', FFFDx2), ('F1 BF BF 00', FFFD+'\x00'),
+ ('F1 BF BF 7F', FFFD+'\x7f'), ('F1 BF BF C0', FFFDx2),
+ ('F1 BF BF FF', FFFDx2), ('F3 00', FFFD+'\x00'),
+ ('F3 7F', FFFD+'\x7f'), ('F3 C0', FFFDx2), ('F3 FF', FFFDx2),
+ ('F3 80 00', FFFD+'\x00'), ('F3 80 7F', FFFD+'\x7f'),
+ ('F3 80 C0', FFFDx2), ('F3 80 FF', FFFDx2),
+ ('F3 BF 00', FFFD+'\x00'), ('F3 BF 7F', FFFD+'\x7f'),
+ ('F3 BF C0', FFFDx2), ('F3 BF FF', FFFDx2),
+ ('F3 80 80 00', FFFD+'\x00'), ('F3 80 80 7F', FFFD+'\x7f'),
+ ('F3 80 80 C0', FFFDx2), ('F3 80 80 FF', FFFDx2),
+ ('F3 80 BF 00', FFFD+'\x00'), ('F3 80 BF 7F', FFFD+'\x7f'),
+ ('F3 80 BF C0', FFFDx2), ('F3 80 BF FF', FFFDx2),
+ ('F3 BF 80 00', FFFD+'\x00'), ('F3 BF 80 7F', FFFD+'\x7f'),
+ ('F3 BF 80 C0', FFFDx2), ('F3 BF 80 FF', FFFDx2),
+ ('F3 BF BF 00', FFFD+'\x00'), ('F3 BF BF 7F', FFFD+'\x7f'),
+ ('F3 BF BF C0', FFFDx2), ('F3 BF BF FF', FFFDx2),
+ ('F4 00', FFFD+'\x00'), ('F4 7F', FFFD+'\x7f'), ('F4 90', FFFDx2),
+ ('F4 BF', FFFDx2), ('F4 C0', FFFDx2), ('F4 FF', FFFDx2),
+ ('F4 80 00', FFFD+'\x00'), ('F4 80 7F', FFFD+'\x7f'),
+ ('F4 80 C0', FFFDx2), ('F4 80 FF', FFFDx2),
+ ('F4 8F 00', FFFD+'\x00'), ('F4 8F 7F', FFFD+'\x7f'),
+ ('F4 8F C0', FFFDx2), ('F4 8F FF', FFFDx2),
+ ('F4 80 80 00', FFFD+'\x00'), ('F4 80 80 7F', FFFD+'\x7f'),
+ ('F4 80 80 C0', FFFDx2), ('F4 80 80 FF', FFFDx2),
+ ('F4 80 BF 00', FFFD+'\x00'), ('F4 80 BF 7F', FFFD+'\x7f'),
+ ('F4 80 BF C0', FFFDx2), ('F4 80 BF FF', FFFDx2),
+ ('F4 8F 80 00', FFFD+'\x00'), ('F4 8F 80 7F', FFFD+'\x7f'),
+ ('F4 8F 80 C0', FFFDx2), ('F4 8F 80 FF', FFFDx2),
+ ('F4 8F BF 00', FFFD+'\x00'), ('F4 8F BF 7F', FFFD+'\x7f'),
+ ('F4 8F BF C0', FFFDx2), ('F4 8F BF FF', FFFDx2)
+ ]
+ for seq, res in sequences:
+ self.assertCorrectUTF8Decoding(self.to_bytestring(seq), res,
+ 'invalid continuation byte')
+
def test_codecs_idna(self):
# Test whether trailing dot is preserved
self.assertEqual("www.python.org.".encode("idna"), b"www.python.org.")
@@ -1391,21 +1725,11 @@ class UnicodeTest(string_tests.CommonTest,
self.assertRaises(TypeError, str, b"hello", "test.unicode2")
self.assertRaises(TypeError, "hello".encode, "test.unicode1")
self.assertRaises(TypeError, "hello".encode, "test.unicode2")
- # executes PyUnicode_Encode()
- import imp
- self.assertRaises(
- ImportError,
- imp.find_module,
- "non-existing module",
- ["non-existing dir"]
- )
# Error handling (wrong arguments)
self.assertRaises(TypeError, "hello".encode, 42, 42, 42)
# Error handling (lone surrogate in PyUnicode_TransformDecimalToASCII())
- self.assertRaises(UnicodeError, int, "\ud800")
- self.assertRaises(UnicodeError, int, "\udf00")
self.assertRaises(UnicodeError, float, "\ud800")
self.assertRaises(UnicodeError, float, "\udf00")
self.assertRaises(UnicodeError, complex, "\ud800")
@@ -1416,18 +1740,25 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('hello'.encode('ascii'), b'hello')
self.assertEqual('hello'.encode('utf-7'), b'hello')
self.assertEqual('hello'.encode('utf-8'), b'hello')
- self.assertEqual('hello'.encode('utf8'), b'hello')
+ self.assertEqual('hello'.encode('utf-8'), b'hello')
self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000')
self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
self.assertEqual('hello'.encode('latin-1'), b'hello')
+ # Default encoding is utf-8
+ self.assertEqual('\u2603'.encode(), b'\xe2\x98\x83')
+
# Roundtrip safety for BMP (just the first 1024 chars)
for c in range(1024):
u = chr(c)
for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
'utf-16-be', 'raw_unicode_escape',
'unicode_escape', 'unicode_internal'):
- self.assertEqual(str(u.encode(encoding),encoding), u)
+ with warnings.catch_warnings():
+ # unicode-internal has been deprecated
+ warnings.simplefilter("ignore", DeprecationWarning)
+
+ self.assertEqual(str(u.encode(encoding),encoding), u)
# Roundtrip safety for BMP (just the first 256 chars)
for c in range(256):
@@ -1442,18 +1773,20 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(str(u.encode(encoding),encoding), u)
# Roundtrip safety for non-BMP (just a few chars)
- u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
- for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
- #'raw_unicode_escape',
- 'unicode_escape', 'unicode_internal'):
- self.assertEqual(str(u.encode(encoding),encoding), u)
+ with warnings.catch_warnings():
+ # unicode-internal has been deprecated
+ warnings.simplefilter("ignore", DeprecationWarning)
- # UTF-8 must be roundtrip safe for all UCS-2 code points
- # This excludes surrogates: in the full range, there would be
- # a surrogate pair (\udbff\udc00), which gets converted back
- # to a non-BMP character (\U0010fc00)
- u = ''.join(map(chr, list(range(0,0xd800)) +
- list(range(0xe000,0x10000))))
+ u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
+ for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
+ 'raw_unicode_escape',
+ 'unicode_escape', 'unicode_internal'):
+ self.assertEqual(str(u.encode(encoding),encoding), u)
+
+ # UTF-8 must be roundtrip safe for all code points
+ # (except surrogates, which are forbidden).
+ u = ''.join(map(chr, list(range(0, 0xd800)) +
+ list(range(0xe000, 0x110000))))
for encoding in ('utf-8',):
self.assertEqual(str(u.encode(encoding),encoding), u)
@@ -1630,25 +1963,47 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(repr('\U00010000'), "'%c'" % (0x10000,)) # printable
self.assertEqual(repr('\U00014000'), "'\\U00014000'") # nonprintable
+ # This test only affects 32-bit platforms because expandtabs can only take
+ # an int as the max value, not a 64-bit C long. If expandtabs is changed
+ # to take a 64-bit long, this test should apply to all platforms.
+ @unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4,
+ 'only applies to 32-bit platforms')
def test_expandtabs_overflows_gracefully(self):
- # This test only affects 32-bit platforms because expandtabs can only take
- # an int as the max value, not a 64-bit C long. If expandtabs is changed
- # to take a 64-bit long, this test should apply to all platforms.
- if sys.maxsize > (1 << 32) or struct.calcsize('P') != 4:
- return
self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxsize)
+ @support.cpython_only
+ def test_expandtabs_optimization(self):
+ s = 'abc'
+ self.assertIs(s.expandtabs(), s)
+
def test_raiseMemError(self):
- # Ensure that the freelist contains a consistent object, even
- # when a string allocation fails with a MemoryError.
- # This used to crash the interpreter,
- # or leak references when the number was smaller.
- charwidth = 4 if sys.maxunicode >= 0x10000 else 2
- # Note: sys.maxsize is half of the actual max allocation because of
- # the signedness of Py_ssize_t.
- alloc = lambda: "a" * (sys.maxsize // charwidth * 2)
- self.assertRaises(MemoryError, alloc)
- self.assertRaises(MemoryError, alloc)
+ if struct.calcsize('P') == 8:
+ # 64 bits pointers
+ ascii_struct_size = 48
+ compact_struct_size = 72
+ else:
+ # 32 bits pointers
+ ascii_struct_size = 24
+ compact_struct_size = 36
+
+ for char in ('a', '\xe9', '\u20ac', '\U0010ffff'):
+ code = ord(char)
+ if code < 0x100:
+ char_size = 1 # sizeof(Py_UCS1)
+ struct_size = ascii_struct_size
+ elif code < 0x10000:
+ char_size = 2 # sizeof(Py_UCS2)
+ struct_size = compact_struct_size
+ else:
+ char_size = 4 # sizeof(Py_UCS4)
+ struct_size = compact_struct_size
+ # Note: sys.maxsize is half of the actual max allocation because of
+ # the signedness of Py_ssize_t. Strings of maxlen-1 should in principle
+ # be allocatable, given enough memory.
+ maxlen = ((sys.maxsize - struct_size) // char_size)
+ alloc = lambda: char * maxlen
+ self.assertRaises(MemoryError, alloc)
+ self.assertRaises(MemoryError, alloc)
def test_format_subclass(self):
class S(str):
@@ -1665,10 +2020,7 @@ class UnicodeTest(string_tests.CommonTest,
pythonapi, py_object, sizeof,
c_int, c_long, c_longlong, c_ssize_t,
c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
- if sys.maxunicode == 65535:
- name = "PyUnicodeUCS2_FromFormat"
- else:
- name = "PyUnicodeUCS4_FromFormat"
+ name = "PyUnicode_FromFormat"
_PyUnicode_FromFormat = getattr(pythonapi, name)
_PyUnicode_FromFormat.restype = py_object
@@ -1731,6 +2083,12 @@ class UnicodeTest(string_tests.CommonTest,
b'%0.4i', c_int(10))
check_format('-123',
b'%i', c_int(-123))
+ check_format('-123',
+ b'%li', c_long(-123))
+ check_format('-123',
+ b'%lli', c_longlong(-123))
+ check_format('-123',
+ b'%zi', c_ssize_t(-123))
check_format('-123',
b'%d', c_int(-123))
@@ -1767,8 +2125,8 @@ class UnicodeTest(string_tests.CommonTest,
b'%010i', c_int(123))
check_format('123'.rjust(100),
b'%100i', c_int(123))
- check_format('123'.rjust(100, '0'),
- b'%.100i', c_int(123))
+ check_format('123'.rjust(300, '0'),
+ b'%.300i', c_int(123))
check_format('123'.rjust(80, '0').rjust(100),
b'%100.80i', c_int(123))
@@ -1776,8 +2134,8 @@ class UnicodeTest(string_tests.CommonTest,
b'%010u', c_uint(123))
check_format('123'.rjust(100),
b'%100u', c_uint(123))
- check_format('123'.rjust(100, '0'),
- b'%.100u', c_uint(123))
+ check_format('123'.rjust(300, '0'),
+ b'%.300u', c_uint(123))
check_format('123'.rjust(80, '0').rjust(100),
b'%100.80u', c_uint(123))
@@ -1785,8 +2143,8 @@ class UnicodeTest(string_tests.CommonTest,
b'%010x', c_int(0x123))
check_format('123'.rjust(100),
b'%100x', c_int(0x123))
- check_format('123'.rjust(100, '0'),
- b'%.100x', c_int(0x123))
+ check_format('123'.rjust(300, '0'),
+ b'%.300x', c_int(0x123))
check_format('123'.rjust(80, '0').rjust(100),
b'%100.80x', c_int(0x123))
@@ -1816,10 +2174,11 @@ class UnicodeTest(string_tests.CommonTest,
b'%1abc')
check_format('%+i',
b'%+i', c_int(10))
- check_format('%s',
+ check_format('%.%s',
b'%.%s', b'abc')
# Test PyUnicode_AsWideChar()
+ @support.cpython_only
def test_aswidechar(self):
from _testcapi import unicode_aswidechar
support.import_module('ctypes')
@@ -1857,6 +2216,7 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(wchar, nonbmp + '\0')
# Test PyUnicode_AsWideCharString()
+ @support.cpython_only
def test_aswidecharstring(self):
from _testcapi import unicode_aswidecharstring
support.import_module('ctypes')
@@ -1879,6 +2239,87 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(size, nchar)
self.assertEqual(wchar, nonbmp + '\0')
+ def test_subclass_add(self):
+ class S(str):
+ def __add__(self, o):
+ return "3"
+ self.assertEqual(S("4") + S("5"), "3")
+ class S(str):
+ def __iadd__(self, o):
+ return "3"
+ s = S("1")
+ s += "4"
+ self.assertEqual(s, "3")
+
+ @support.cpython_only
+ def test_encode_decimal(self):
+ from _testcapi import unicode_encodedecimal
+ self.assertEqual(unicode_encodedecimal('123'),
+ b'123')
+ self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
+ b'3.14')
+ self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+ b' 3.14 ')
+ self.assertRaises(UnicodeEncodeError,
+ unicode_encodedecimal, "123\u20ac", "strict")
+ self.assertRaisesRegex(
+ ValueError,
+ "^'decimal' codec can't encode character",
+ unicode_encodedecimal, "123\u20ac", "replace")
+
+ @support.cpython_only
+ def test_transform_decimal(self):
+ from _testcapi import unicode_transformdecimaltoascii as transform_decimal
+ self.assertEqual(transform_decimal('123'),
+ '123')
+ self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
+ '3.14')
+ self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+ "\N{EM SPACE}3.14\N{EN SPACE}")
+ self.assertEqual(transform_decimal('123\u20ac'),
+ '123\u20ac')
+
+ def test_getnewargs(self):
+ text = 'abc'
+ args = text.__getnewargs__()
+ self.assertIsNot(args[0], text)
+ self.assertEqual(args[0], text)
+ self.assertEqual(len(args), 1)
+
+ def test_resize(self):
+ for length in range(1, 100, 7):
+ # generate a fresh string (refcount=1)
+ text = 'a' * length + 'b'
+
+ with support.check_warnings(('unicode_internal codec has been '
+ 'deprecated', DeprecationWarning)):
+ # fill wstr internal field
+ abc = text.encode('unicode_internal')
+ self.assertEqual(abc.decode('unicode_internal'), text)
+
+ # resize text: wstr field must be cleared and then recomputed
+ text += 'c'
+ abcdef = text.encode('unicode_internal')
+ self.assertNotEqual(abc, abcdef)
+ self.assertEqual(abcdef.decode('unicode_internal'), text)
+
+ @support.cpython_only
+ def test_pep393_utf8_caching_bug(self):
+ # Issue #25709: Problem with string concatenation and utf-8 cache
+ from _testcapi import getargs_s_hash
+ for k in 0x24, 0xa4, 0x20ac, 0x1f40d:
+ s = ''
+ for i in range(5):
+ # Due to CPython specific optimization the 's' string can be
+ # resized in-place.
+ s += chr(k)
+ # Parsing with the "s#" format code calls indirectly
+ # PyUnicode_AsUTF8AndSize() which creates the UTF-8
+ # encoded string cached in the Unicode object.
+ self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
+ # Check that the second call returns the same result
+ self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
+
class StringModuleTest(unittest.TestCase):
def test_formatter_parser(self):
@@ -1930,45 +2371,6 @@ class StringModuleTest(unittest.TestCase):
]])
self.assertRaises(TypeError, _string.formatter_field_name_split, 1)
- def test_encode_decimal(self):
- from _testcapi import unicode_encodedecimal
- self.assertEqual(unicode_encodedecimal('123'),
- b'123')
- self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
- b'3.14')
- self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
- b' 3.14 ')
- self.assertRaises(UnicodeEncodeError,
- unicode_encodedecimal, "123\u20ac", "strict")
- self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"),
- b'123?')
- self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"),
- b'123')
- self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"),
- b'123&#8364;')
- self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
- b'123\\u20ac')
- self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"),
- b'123? ')
- self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"),
- b'123??')
- self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"),
- b'123?0')
-
- def test_transform_decimal(self):
- from _testcapi import unicode_transformdecimaltoascii as transform_decimal
- self.assertEqual(transform_decimal('123'),
- '123')
- self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
- '3.14')
- self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
- "\N{EM SPACE}3.14\N{EN SPACE}")
- self.assertEqual(transform_decimal('123\u20ac'),
- '123\u20ac')
-
-
-def test_main():
- support.run_unittest(__name__)
if __name__ == "__main__":
- test_main()
+ unittest.main()