From 5442561c1a094b68900198bade616da9ed509ac8 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 8 Jun 2022 20:18:08 +0200 Subject: gh-93575: Use correct way to calculate PyUnicode struct sizes (GH-93602) * gh-93575: Use correct way to calculate PyUnicode struct sizes * Add comment to keep test_sys and test_unicode in sync * Fix case code < 256 --- Lib/test/test_sys.py | 1 + Lib/test/test_unicode.py | 23 +++++++++++++++++----- .../2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst | 4 ++++ 3 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 94a09ff..1dc10d8 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1539,6 +1539,7 @@ class SizeofTest(unittest.TestCase): samples = ['1'*100, '\xff'*50, '\u0100'*40, '\uffff'*100, '\U00010000'*30, '\U0010ffff'*100] + # also update field definitions in test_unicode.test_raiseMemError asciifields = "nnb" compactfields = asciifields + "nP" unicodefields = compactfields + "P" diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 64abc0c..9765ed9 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2370,15 +2370,19 @@ class UnicodeTest(string_tests.CommonTest, self.assertIs(s.expandtabs(), s) def test_raiseMemError(self): - null_byte = 1 - ascii_struct_size = sys.getsizeof("a") - len("a") - null_byte - compact_struct_size = sys.getsizeof("\xff") - len("\xff") - null_byte + asciifields = "nnb" + compactfields = asciifields + "nP" + ascii_struct_size = support.calcobjsize(asciifields) + compact_struct_size = support.calcobjsize(compactfields) for char in ('a', '\xe9', '\u20ac', '\U0010ffff'): code = ord(char) - if code < 0x100: + if code < 0x80: char_size = 1 # sizeof(Py_UCS1) struct_size = ascii_struct_size + elif code < 0x100: + char_size = 1 # sizeof(Py_UCS1) + struct_size = compact_struct_size elif code < 0x10000: char_size = 2 # sizeof(Py_UCS2) struct_size = compact_struct_size @@ -2390,7 +2394,16 @@ class UnicodeTest(string_tests.CommonTest, # be allocatable, given enough memory. maxlen = ((sys.maxsize - struct_size) // char_size) alloc = lambda: char * maxlen - with self.subTest(char=char): + with self.subTest( + char=char, + struct_size=struct_size, + char_size=char_size + ): + # self-check + self.assertEqual( + sys.getsizeof(char * 42), + struct_size + (char_size * (42 + 1)) + ) self.assertRaises(MemoryError, alloc) self.assertRaises(MemoryError, alloc) diff --git a/Misc/NEWS.d/next/Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst b/Misc/NEWS.d/next/Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst new file mode 100644 index 0000000..98d1532 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst @@ -0,0 +1,4 @@ +Fix issue with test_unicode test_raiseMemError. The test case now use +``test.support.calcobjsize`` to calculate size of PyUnicode structs. +:func:`sys.getsizeof` may return different size when string has UTF-8 +memory. -- cgit v0.12