summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2011-09-28 05:41:54 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2011-09-28 05:41:54 (GMT)
commitd63a3b8beb4a0841cb59fb3515347ccaab34b733 (patch)
tree3b4e3cc63151c5a5a910c3550a190aefaea96ad4 /Lib
parent48d49497c50e79d14e9df9527d766ca3a0a38be5 (diff)
downloadcpython-d63a3b8beb4a0841cb59fb3515347ccaab34b733.zip
cpython-d63a3b8beb4a0841cb59fb3515347ccaab34b733.tar.gz
cpython-d63a3b8beb4a0841cb59fb3515347ccaab34b733.tar.bz2
Implement PEP 393.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/json/decoder.py3
-rw-r--r--Lib/test/json_tests/test_scanstring.py11
-rw-r--r--Lib/test/test_codeccallbacks.py7
-rw-r--r--Lib/test/test_codecs.py4
-rw-r--r--Lib/test/test_peepholer.py4
-rw-r--r--Lib/test/test_re.py7
-rw-r--r--Lib/test/test_sys.py38
-rw-r--r--Lib/test/test_unicode.py41
8 files changed, 79 insertions, 36 deletions
diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py
index 3174e31..e7c0539 100644
--- a/Lib/json/decoder.py
+++ b/Lib/json/decoder.py
@@ -121,8 +121,7 @@ def py_scanstring(s, end, strict=True,
msg = "Invalid \\uXXXX escape"
raise ValueError(errmsg(msg, s, end))
uni = int(esc, 16)
- # Check for surrogate pair on UCS-4 systems
- if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
+ if 0xd800 <= uni <= 0xdbff:
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
if not s[end + 5:end + 7] == '\\u':
raise ValueError(errmsg(msg, s, end))
diff --git a/Lib/test/json_tests/test_scanstring.py b/Lib/test/json_tests/test_scanstring.py
index f82cdee..426c8dd 100644
--- a/Lib/test/json_tests/test_scanstring.py
+++ b/Lib/test/json_tests/test_scanstring.py
@@ -9,14 +9,9 @@ class TestScanstring:
scanstring('"z\\ud834\\udd20x"', 1, True),
('z\U0001d120x', 16))
- if sys.maxunicode == 65535:
- self.assertEqual(
- scanstring('"z\U0001d120x"', 1, True),
- ('z\U0001d120x', 6))
- else:
- self.assertEqual(
- scanstring('"z\U0001d120x"', 1, True),
- ('z\U0001d120x', 5))
+ self.assertEqual(
+ scanstring('"z\U0001d120x"', 1, True),
+ ('z\U0001d120x', 5))
self.assertEqual(
scanstring('"\\u007b"', 1, True),
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index c5b1e25..317ae44 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -1,5 +1,6 @@
import test.support, unittest
import sys, codecs, html.entities, unicodedata
+import ctypes
class PosReturn:
# this can be used for configurable callbacks
@@ -577,8 +578,10 @@ class CodecCallbackTest(unittest.TestCase):
UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
("\\uffff", 1)
)
- # 1 on UCS-4 builds, 2 on UCS-2
- len_wide = len("\U00010000")
+ if ctypes.sizeof(ctypes.c_wchar) == 2:
+ len_wide = 2
+ else:
+ len_wide = 1
self.assertEqual(
codecs.backslashreplace_errors(
UnicodeEncodeError("ascii", "\U00010000",
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 7a9e38c..17038cb 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -622,6 +622,10 @@ class UTF8Test(ReadTest):
b"abc\xed\xa0\x80def")
self.assertEqual(b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass"),
"abc\ud800def")
+ self.assertEqual("\U00010fff\uD800".encode("utf-8", "surrogatepass"),
+ b"\xf0\x90\xbf\xbf\xed\xa0\x80")
+ self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("utf-8", "surrogatepass"),
+ "\U00010fff\uD800")
self.assertTrue(codecs.lookup_error("surrogatepass"))
class UTF7Test(ReadTest):
diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py
index e0e3f63..1e782cf 100644
--- a/Lib/test/test_peepholer.py
+++ b/Lib/test/test_peepholer.py
@@ -218,10 +218,6 @@ class TestTranforms(unittest.TestCase):
# out of range
asm = dis_single('"fuu"[10]')
self.assertIn('BINARY_SUBSCR', asm)
- # non-BMP char (see #5057)
- asm = dis_single('"\U00012345"[0]')
- self.assertIn('BINARY_SUBSCR', asm)
-
def test_folding_of_unaryops_on_constants(self):
for line, elem in (
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index e3d10f7..d23c49b 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -780,6 +780,13 @@ class ReTests(unittest.TestCase):
self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
self.assertRaises(TypeError, _sre.compile, {}, 0, [])
+ def test_search_dot_unicode(self):
+ self.assertIsNotNone(re.search("123.*-", '123abc-'))
+ self.assertIsNotNone(re.search("123.*-", '123\xe9-'))
+ self.assertIsNotNone(re.search("123.*-", '123\u20ac-'))
+ self.assertIsNotNone(re.search("123.*-", '123\U0010ffff-'))
+ self.assertIsNotNone(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
+
def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
if verbose:
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 4355ef5..ed96d76 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -833,13 +833,39 @@ class SizeofTest(unittest.TestCase):
class newstyleclass(object): pass
check(newstyleclass, s)
# unicode
- usize = len('\0'.encode('unicode-internal'))
- samples = ['', '1'*100]
- # we need to test for both sizes, because we don't know if the string
- # has been cached
+ # each tuple contains a string and its expected character size
+ # don't put any static strings here, as they may contain
+ # wchar_t or UTF-8 representations
+ samples = ['1'*100, '\xff'*50,
+ '\u0100'*40, '\uffff'*100,
+ '\U00010000'*30, '\U0010ffff'*100]
+ asciifields = h + "PPiP"
+ compactfields = asciifields + "PPP"
+ unicodefields = compactfields + "P"
for s in samples:
- basicsize = size(h + 'PPPiP') + usize * (len(s) + 1)
- check(s, basicsize)
+ maxchar = ord(max(s))
+ if maxchar < 128:
+ L = size(asciifields) + len(s) + 1
+ elif maxchar < 256:
+ L = size(compactfields) + len(s) + 1
+ elif maxchar < 65536:
+ L = size(compactfields) + 2*(len(s) + 1)
+ else:
+ L = size(compactfields) + 4*(len(s) + 1)
+ check(s, L)
+ # verify that the UTF-8 size is accounted for
+ s = chr(0x4000) # 4 bytes canonical representation
+ check(s, size(compactfields) + 4)
+ try:
+ # FIXME: codecs.lookup(str) calls encoding.search_function() which
+ # calls __import__ using str in the module name. __import__ encodes
+ # the module name to the file system encoding (which is the locale
+ # encoding), so test_sys fails if the locale encoding is not UTF-8.
+ codecs.lookup(s) # produces 4 bytes UTF-8
+ except LookupError:
+ check(s, size(compactfields) + 4 + 4)
+ # TODO: add check that forces the presence of wchar_t representation
+ # TODO: add check that forces layout of unicodefields
# weakref
import weakref
check(weakref.ref(int), size(h + '2Pl2P'))
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index f7424c0..f256ba6 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1583,16 +1583,32 @@ class UnicodeTest(string_tests.CommonTest,
self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxsize)
def test_raiseMemError(self):
- # Ensure that the freelist contains a consistent object, even
- # when a string allocation fails with a MemoryError.
- # This used to crash the interpreter,
- # or leak references when the number was smaller.
- charwidth = 4 if sys.maxunicode >= 0x10000 else 2
- # Note: sys.maxsize is half of the actual max allocation because of
- # the signedness of Py_ssize_t.
- alloc = lambda: "a" * (sys.maxsize // charwidth * 2)
- self.assertRaises(MemoryError, alloc)
- self.assertRaises(MemoryError, alloc)
+ if struct.calcsize('P') == 8:
+ # 64 bits pointers
+ ascii_struct_size = 64
+ compact_struct_size = 88
+ else:
+ # 32 bits pointers
+ ascii_struct_size = 32
+ compact_struct_size = 44
+
+ for char in ('a', '\xe9', '\u20ac', '\U0010ffff'):
+ code = ord(char)
+ if code < 0x100:
+ char_size = 1 # sizeof(Py_UCS1)
+ struct_size = ascii_struct_size
+ elif code < 0x10000:
+ char_size = 2 # sizeof(Py_UCS2)
+ struct_size = compact_struct_size
+ else:
+ char_size = 4 # sizeof(Py_UCS4)
+ struct_size = compact_struct_size
+ # Note: sys.maxsize is half of the actual max allocation because of
+ # the signedness of Py_ssize_t. -1 because of the null character.
+ maxlen = ((sys.maxsize - struct_size) // char_size) - 1
+ alloc = lambda: char * maxlen
+ self.assertRaises(MemoryError, alloc)
+ self.assertRaises(MemoryError, alloc)
def test_format_subclass(self):
class S(str):
@@ -1608,10 +1624,7 @@ class UnicodeTest(string_tests.CommonTest,
from ctypes import (pythonapi, py_object,
c_int, c_long, c_longlong, c_ssize_t,
c_uint, c_ulong, c_ulonglong, c_size_t)
- if sys.maxunicode == 65535:
- name = "PyUnicodeUCS2_FromFormat"
- else:
- name = "PyUnicodeUCS4_FromFormat"
+ name = "PyUnicode_FromFormat"
_PyUnicode_FromFormat = getattr(pythonapi, name)
_PyUnicode_FromFormat.restype = py_object