diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2015-12-02 23:04:37 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2015-12-02 23:04:37 (GMT) |
commit | 6648bf5661b79f5b40385b21570dff6f146c5eb5 (patch) | |
tree | 2bad11de9ee858dad1be1567c81097eb1573473a | |
parent | 38982e543a3ac61d30cfa98d36ab6e5ff7acc852 (diff) | |
parent | 7aa690860eceb74332c1edad1b8a7c4956bbdad5 (diff) | |
download | cpython-6648bf5661b79f5b40385b21570dff6f146c5eb5.zip cpython-6648bf5661b79f5b40385b21570dff6f146c5eb5.tar.gz cpython-6648bf5661b79f5b40385b21570dff6f146c5eb5.tar.bz2 |
Issue #25709: Fixed problem with in-place string concatenation and utf-8 cache.
-rw-r--r-- | Lib/test/test_unicode.py | 17 | ||||
-rw-r--r-- | Misc/NEWS | 2 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 5 |
3 files changed, 24 insertions, 0 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index ce8df40..fac8b7b 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2702,6 +2702,23 @@ class UnicodeTest(string_tests.CommonTest, self.assertTrue(astral >= bmp2) self.assertFalse(astral >= astral2) + @support.cpython_only + def test_pep393_utf8_caching_bug(self): + # Issue #25709: Problem with string concatenation and utf-8 cache + from _testcapi import getargs_s_hash + for k in 0x24, 0xa4, 0x20ac, 0x1f40d: + s = '' + for i in range(5): + # Due to CPython specific optimization the 's' string can be + # resized in-place. + s += chr(k) + # Parsing with the "s#" format code calls indirectly + # PyUnicode_AsUTF8AndSize() which creates the UTF-8 + # encoded string cached in the Unicode object. + self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) + # Check that the second call returns the same result + self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): @@ -10,6 +10,8 @@ Release date: XXXX-XX-XX Core and Builtins ----------------- +- Issue #25709: Fixed problem with in-place string concatenation and utf-8 cache. + - Issue #24097: Fixed crash in object.__reduce__() if slot name is freed inside __getattr__. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8b885df..0b78301 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -722,6 +722,11 @@ resize_compact(PyObject *unicode, Py_ssize_t length) } new_size = (struct_size + (length + 1) * char_size); + if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { + PyObject_DEL(_PyUnicode_UTF8(unicode)); + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + } _Py_DEC_REFTOTAL; _Py_ForgetReference(unicode); |