gh-92536: PEP 623: Remove wstr and legacy APIs from Unicode (GH-92537)

author: Inada Naoki <songofacandy@gmail.com> 2022-05-12 05:48:38 (GMT)
committer: GitHub <noreply@github.com> 2022-05-12 05:48:38 (GMT)
commit: f9c9354a7a173eaca2aa19e667b5cf12167b7fed (patch)
tree: eb0fdd3219f53c973f1a7dbbcb9f8b0e0babdf36 /Tools
parent: 68fec31364e96d122aae0571c14683b4ddb0ebd0 (diff)
download: cpython-f9c9354a7a173eaca2aa19e667b5cf12167b7fed.zip
cpython-f9c9354a7a173eaca2aa19e667b5cf12167b7fed.tar.gz
cpython-f9c9354a7a173eaca2aa19e667b5cf12167b7fed.tar.bz2
3 files changed, 14 insertions, 65 deletions
diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py
index 5ad4f87..53e29df 100755
--- a/Tools/clinic/clinic.py
+++ b/Tools/clinic/clinic.py
@@ -3526,9 +3526,7 @@ class Py_UNICODE_converter(CConverter):
     def cleanup(self):
         if not self.length:
             return """\
-#if !USE_UNICODE_WCHAR_CACHE
 PyMem_Free((void *){name});
-#endif /* USE_UNICODE_WCHAR_CACHE */
 """.format(name=self.name)
 
     def parse_arg(self, argname, argnum):
@@ -3539,11 +3537,7 @@ PyMem_Free((void *){name});
                         _PyArg_BadArgument("{{name}}", {argnum}, "str", {argname});
                         goto exit;
                     }}}}
-                    #if USE_UNICODE_WCHAR_CACHE
-                    {paramname} = _PyUnicode_AsUnicode({argname});
-                    #else /* USE_UNICODE_WCHAR_CACHE */
                     {paramname} = PyUnicode_AsWideCharString({argname}, NULL);
-                    #endif /* USE_UNICODE_WCHAR_CACHE */
                     if ({paramname} == NULL) {{{{
                         goto exit;
                     }}}}
@@ -3554,11 +3548,7 @@ PyMem_Free((void *){name});
                         {paramname} = NULL;
                     }}}}
                     else if (PyUnicode_Check({argname})) {{{{
-                        #if USE_UNICODE_WCHAR_CACHE
-                        {paramname} = _PyUnicode_AsUnicode({argname});
-                        #else /* USE_UNICODE_WCHAR_CACHE */
                         {paramname} = PyUnicode_AsWideCharString({argname}, NULL);
-                        #endif /* USE_UNICODE_WCHAR_CACHE */
                         if ({paramname} == NULL) {{{{
                             goto exit;
                         }}}}
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index 857e52f..80563ea 100755
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -1376,57 +1376,28 @@ class PyUnicodeObjectPtr(PyObjectPtr):
         return _type_Py_UNICODE.sizeof
 
     def proxyval(self, visited):
-        may_have_surrogates = False
         compact = self.field('_base')
         ascii = compact['_base']
         state = ascii['state']
         is_compact_ascii = (int(state['ascii']) and int(state['compact']))
-        if not int(state['ready']):
-            # string is not ready
-            field_length = int(compact['wstr_length'])
-            may_have_surrogates = True
-            field_str = ascii['wstr']
+        field_length = int(ascii['length'])
+        if is_compact_ascii:
+            field_str = ascii.address + 1
+        elif int(state['compact']):
+            field_str = compact.address + 1
         else:
-            field_length = int(ascii['length'])
-            if is_compact_ascii:
-                field_str = ascii.address + 1
-            elif int(state['compact']):
-                field_str = compact.address + 1
-            else:
-                field_str = self.field('data')['any']
-            repr_kind = int(state['kind'])
-            if repr_kind == 1:
-                field_str = field_str.cast(_type_unsigned_char_ptr())
-            elif repr_kind == 2:
-                field_str = field_str.cast(_type_unsigned_short_ptr())
-            elif repr_kind == 4:
-                field_str = field_str.cast(_type_unsigned_int_ptr())
+            field_str = self.field('data')['any']
+        repr_kind = int(state['kind'])
+        if repr_kind == 1:
+            field_str = field_str.cast(_type_unsigned_char_ptr())
+        elif repr_kind == 2:
+            field_str = field_str.cast(_type_unsigned_short_ptr())
+        elif repr_kind == 4:
+            field_str = field_str.cast(_type_unsigned_int_ptr())
 
         # Gather a list of ints from the Py_UNICODE array; these are either
         # UCS-1, UCS-2 or UCS-4 code points:
-        if not may_have_surrogates:
-            Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
-        else:
-            # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
-            # inferior process: we must join surrogate pairs.
-            Py_UNICODEs = []
-            i = 0
-            limit = safety_limit(field_length)
-            while i < limit:
-                ucs = int(field_str[i])
-                i += 1
-                if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
-                    Py_UNICODEs.append(ucs)
-                    continue
-                # This could be a surrogate pair.
-                ucs2 = int(field_str[i])
-                if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
-                    continue
-                code = (ucs & 0x03FF) << 10
-                code |= ucs2 & 0x03FF
-                code += 0x00010000
-                Py_UNICODEs.append(code)
-                i += 1
+        Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
 
         # Convert the int code points to unicode characters, and generate a
         # local unicode instance.
diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py
index 5ee6c2f..ac20767 100644
--- a/Tools/scripts/deepfreeze.py
+++ b/Tools/scripts/deepfreeze.py
@@ -200,7 +200,6 @@ class Printer:
                         self.write(".kind = 1,")
                         self.write(".compact = 1,")
                         self.write(".ascii = 1,")
-                        self.write(".ready = 1,")
                 self.write(f"._data = {make_string_literal(s.encode('ascii'))},")
                 return f"& {name}._ascii.ob_base"
             else:
@@ -213,21 +212,10 @@ class Printer:
                             self.write(f".kind = {kind},")
                             self.write(".compact = 1,")
                             self.write(".ascii = 0,")
-                            self.write(".ready = 1,")
                 with self.block(f"._data =", ","):
                     for i in range(0, len(s), 16):
                         data = s[i:i+16]
                         self.write(", ".join(map(str, map(ord, data))) + ",")
-                if kind == PyUnicode_2BYTE_KIND:
-                    self.patchups.append("if (sizeof(wchar_t) == 2) {")
-                    self.patchups.append(f"    {name}._compact._base.wstr = (wchar_t *) {name}._data;")
-                    self.patchups.append(f"    {name}._compact.wstr_length = {len(s)};")
-                    self.patchups.append("}")
-                if kind == PyUnicode_4BYTE_KIND:
-                    self.patchups.append("if (sizeof(wchar_t) == 4) {")
-                    self.patchups.append(f"    {name}._compact._base.wstr = (wchar_t *) {name}._data;")
-                    self.patchups.append(f"    {name}._compact.wstr_length = {len(s)};")
-                    self.patchups.append("}")
                 return f"& {name}._compact._base.ob_base"
author	Inada Naoki <songofacandy@gmail.com>	2022-05-12 05:48:38 (GMT)
committer	GitHub <noreply@github.com>	2022-05-12 05:48:38 (GMT)
commit	f9c9354a7a173eaca2aa19e667b5cf12167b7fed (patch)
tree	eb0fdd3219f53c973f1a7dbbcb9f8b0e0babdf36 /Tools
parent	68fec31364e96d122aae0571c14683b4ddb0ebd0 (diff)
download	cpython-f9c9354a7a173eaca2aa19e667b5cf12167b7fed.zip cpython-f9c9354a7a173eaca2aa19e667b5cf12167b7fed.tar.gz cpython-f9c9354a7a173eaca2aa19e667b5cf12167b7fed.tar.bz2