diff options
author | Inada Naoki <songofacandy@gmail.com> | 2019-03-18 06:44:11 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-03-18 06:44:11 (GMT) |
commit | 6a16b18224fa98f6d192aa5014affeccc0376eb3 (patch) | |
tree | d42d5fb270ce1a0e77235b9d5841fe2daa64b4e6 | |
parent | 6fb544d8bc994ceb96b0fc5059c65fa82997743e (diff) | |
download | cpython-6a16b18224fa98f6d192aa5014affeccc0376eb3.zip cpython-6a16b18224fa98f6d192aa5014affeccc0376eb3.tar.gz cpython-6a16b18224fa98f6d192aa5014affeccc0376eb3.tar.bz2 |
bpo-36297: remove "unicode_internal" codec (GH-12342)
-rw-r--r-- | Doc/library/codecs.rst | 14 | ||||
-rw-r--r-- | Doc/whatsnew/3.8.rst | 3 | ||||
-rw-r--r-- | Include/cpython/unicodeobject.h | 9 | ||||
-rw-r--r-- | Lib/encodings/unicode_internal.py | 45 | ||||
-rw-r--r-- | Lib/test/test_codeccallbacks.py | 66 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 107 | ||||
-rw-r--r-- | Lib/test/test_unicode.py | 36 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst | 2 | ||||
-rw-r--r-- | Modules/_codecsmodule.c | 82 | ||||
-rw-r--r-- | Modules/clinic/_codecsmodule.c.h | 104 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 102 | ||||
-rw-r--r-- | PCbuild/lib.pyproj | 1 |
12 files changed, 41 insertions, 530 deletions
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index 7cfec63..d2a0c8b 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -1316,16 +1316,10 @@ encodings. | | | code actually uses UTF-8 | | | | by default. | +--------------------+---------+---------------------------+ -| unicode_internal | | Return the internal | -| | | representation of the | -| | | operand. Stateful codecs | -| | | are not supported. | -| | | | -| | | .. deprecated:: 3.3 | -| | | This representation is | -| | | obsoleted by | -| | | :pep:`393`. | -+--------------------+---------+---------------------------+ + +.. versionchanged:: 3.8 + "unicode_internal" codec is removed. + .. _binary-transforms: diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index eaa3f5b..31baccd 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -573,6 +573,9 @@ The following features and APIs have been removed from Python 3.8: * Removed the ``doctype()`` method of :class:`~xml.etree.ElementTree.XMLParser`. (Contributed by Serhiy Storchaka in :issue:`29209`.) +* "unicode_internal" codec is removed. + (Contributed by Inada Naoki in :issue:`36297`.) + Porting to Python 3.8 ===================== diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index c763490..4eecc96 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -896,15 +896,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( Py_ssize_t length /* Number of Py_UNICODE chars to encode */ ) Py_DEPRECATED(3.3); -/* --- Unicode Internal Codec --------------------------------------------- */ - -/* Only for internal use in _codecsmodule.c */ -PyObject *_PyUnicode_DecodeUnicodeInternal( - const char *string, - Py_ssize_t length, - const char *errors - ); - /* --- Latin-1 Codecs ----------------------------------------------------- */ PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String( diff --git a/Lib/encodings/unicode_internal.py b/Lib/encodings/unicode_internal.py deleted file mode 100644 index df3e775..0000000 --- a/Lib/encodings/unicode_internal.py +++ /dev/null @@ -1,45 +0,0 @@ -""" Python 'unicode-internal' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.unicode_internal_encode - decode = codecs.unicode_internal_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.unicode_internal_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.unicode_internal_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='unicode-internal', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index e2e7463..585992b 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -211,42 +211,6 @@ class CodecCallbackTest(unittest.TestCase): charmap[ord("?")] = "XYZ" # wrong type in mapping self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) - def test_decodeunicodeinternal(self): - with test.support.check_warnings(('unicode_internal codec has been ' - 'deprecated', DeprecationWarning)): - self.assertRaises( - UnicodeDecodeError, - b"\x00\x00\x00\x00\x00".decode, - "unicode-internal", - ) - if len('\0'.encode('unicode-internal')) == 4: - def handler_unicodeinternal(exc): - if not isinstance(exc, UnicodeDecodeError): - raise TypeError("don't know how to handle %r" % exc) - return ("\x01", 1) - - self.assertEqual( - b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), - "\u0000" - ) - - self.assertEqual( - b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), - "\u0000\ufffd" - ) - - self.assertEqual( - b"\x00\x00\x00\x00\x00".decode("unicode-internal", "backslashreplace"), - "\u0000\\x00" - ) - - codecs.register_error("test.hui", handler_unicodeinternal) - - self.assertEqual( - b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), - "\u0000\u0001\u0000" - ) - def test_callbacks(self): def handler1(exc): r = range(exc.start, exc.end) @@ -794,16 +758,13 @@ class CodecCallbackTest(unittest.TestCase): ("ascii", b"\xff"), ("utf-8", b"\xff"), ("utf-7", b"+x-"), - ("unicode-internal", b"\x00"), ): - with test.support.check_warnings(): - # unicode-internal has been deprecated - self.assertRaises( - TypeError, - bytes.decode, - enc, - "test.badhandler" - ) + self.assertRaises( + TypeError, + bytes.decode, + enc, + "test.badhandler" + ) def test_lookup(self): self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) @@ -1013,7 +974,6 @@ class CodecCallbackTest(unittest.TestCase): ("utf-32", b"\xff"), ("unicode-escape", b"\\u123g"), ("raw-unicode-escape", b"\\u123g"), - ("unicode-internal", b"\xff"), ] def replacing(exc): @@ -1024,11 +984,9 @@ class CodecCallbackTest(unittest.TestCase): raise TypeError("don't know how to handle %r" % exc) codecs.register_error("test.replacing", replacing) - with test.support.check_warnings(): - # unicode-internal has been deprecated - for (encoding, data) in baddata: - with self.assertRaises(TypeError): - data.decode(encoding, "test.replacing") + for (encoding, data) in baddata: + with self.assertRaises(TypeError): + data.decode(encoding, "test.replacing") def mutating(exc): if isinstance(exc, UnicodeDecodeError): @@ -1039,10 +997,8 @@ class CodecCallbackTest(unittest.TestCase): codecs.register_error("test.mutating", mutating) # If the decoder doesn't pick up the modified input the following # will lead to an endless loop - with test.support.check_warnings(): - # unicode-internal has been deprecated - for (encoding, data) in baddata: - self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242") + for (encoding, data) in baddata: + self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242") # issue32583 def test_crashing_decode_handler(self): diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 893212e..e8c7d76 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1239,16 +1239,6 @@ class EscapeDecodeTest(unittest.TestCase): self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8)) -class RecodingTest(unittest.TestCase): - def test_recoding(self): - f = io.BytesIO() - with codecs.EncodedFile(f, "unicode_internal", "utf-8") as f2: - f2.write("a") - # Python used to crash on this at exit because of a refcount - # bug in _codecsmodule.c - - self.assertTrue(f.closed) - # From RFC 3492 punycode_testcases = [ # A Arabic (Egyptian): @@ -1378,87 +1368,6 @@ class PunycodeTest(unittest.TestCase): self.assertEqual(uni, puny.decode("punycode")) -class UnicodeInternalTest(unittest.TestCase): - @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t') - def test_bug1251300(self): - # Decoding with unicode_internal used to not correctly handle "code - # points" above 0x10ffff on UCS-4 builds. - ok = [ - (b"\x00\x10\xff\xff", "\U0010ffff"), - (b"\x00\x00\x01\x01", "\U00000101"), - (b"", ""), - ] - not_ok = [ - b"\x7f\xff\xff\xff", - b"\x80\x00\x00\x00", - b"\x81\x00\x00\x00", - b"\x00", - b"\x00\x00\x00\x00\x00", - ] - for internal, uni in ok: - if sys.byteorder == "little": - internal = bytes(reversed(internal)) - with support.check_warnings(): - self.assertEqual(uni, internal.decode("unicode_internal")) - for internal in not_ok: - if sys.byteorder == "little": - internal = bytes(reversed(internal)) - with support.check_warnings(('unicode_internal codec has been ' - 'deprecated', DeprecationWarning)): - self.assertRaises(UnicodeDecodeError, internal.decode, - "unicode_internal") - if sys.byteorder == "little": - invalid = b"\x00\x00\x11\x00" - invalid_backslashreplace = r"\x00\x00\x11\x00" - else: - invalid = b"\x00\x11\x00\x00" - invalid_backslashreplace = r"\x00\x11\x00\x00" - with support.check_warnings(): - self.assertRaises(UnicodeDecodeError, - invalid.decode, "unicode_internal") - with support.check_warnings(): - self.assertEqual(invalid.decode("unicode_internal", "replace"), - '\ufffd') - with support.check_warnings(): - self.assertEqual(invalid.decode("unicode_internal", "backslashreplace"), - invalid_backslashreplace) - - @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t') - def test_decode_error_attributes(self): - try: - with support.check_warnings(('unicode_internal codec has been ' - 'deprecated', DeprecationWarning)): - b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal") - except UnicodeDecodeError as ex: - self.assertEqual("unicode_internal", ex.encoding) - self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object) - self.assertEqual(4, ex.start) - self.assertEqual(8, ex.end) - else: - self.fail() - - @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t') - def test_decode_callback(self): - codecs.register_error("UnicodeInternalTest", codecs.ignore_errors) - decoder = codecs.getdecoder("unicode_internal") - with support.check_warnings(('unicode_internal codec has been ' - 'deprecated', DeprecationWarning)): - ab = "ab".encode("unicode_internal").decode() - ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]), - "ascii"), - "UnicodeInternalTest") - self.assertEqual(("ab", 12), ignored) - - def test_encode_length(self): - with support.check_warnings(('unicode_internal codec has been ' - 'deprecated', DeprecationWarning)): - # Issue 3739 - encoder = codecs.getencoder("unicode_internal") - self.assertEqual(encoder("a")[1], 1) - self.assertEqual(encoder("\xe9\u0142")[1], 2) - - self.assertEqual(codecs.escape_encode(br'\x00')[1], 4) - # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html nameprep_tests = [ # 3.1 Map to nothing. @@ -1949,7 +1858,6 @@ all_unicode_encodings = [ "shift_jisx0213", "tis_620", "unicode_escape", - "unicode_internal", "utf_16", "utf_16_be", "utf_16_le", @@ -1969,7 +1877,6 @@ if hasattr(codecs, "oem_encode"): # The following encodings don't work in stateful mode broken_unicode_with_stateful = [ "punycode", - "unicode_internal" ] @@ -1984,12 +1891,10 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): name = "latin_1" self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-")) - with support.check_warnings(): - # unicode-internal has been deprecated - (b, size) = codecs.getencoder(encoding)(s) - self.assertEqual(size, len(s), "encoding=%r" % encoding) - (chars, size) = codecs.getdecoder(encoding)(b) - self.assertEqual(chars, s, "encoding=%r" % encoding) + (b, size) = codecs.getencoder(encoding)(s) + self.assertEqual(size, len(s), "encoding=%r" % encoding) + (chars, size) = codecs.getdecoder(encoding)(b) + self.assertEqual(chars, s, "encoding=%r" % encoding) if encoding not in broken_unicode_with_stateful: # check stream reader/writer @@ -2116,9 +2021,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): def test_bad_encode_args(self): for encoding in all_unicode_encodings: encoder = codecs.getencoder(encoding) - with support.check_warnings(): - # unicode-internal has been deprecated - self.assertRaises(TypeError, encoder) + self.assertRaises(TypeError, encoder) def test_encoding_map_type_initialized(self): from encodings import cp1140 diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index c277e70..1131efd 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2104,12 +2104,8 @@ class UnicodeTest(string_tests.CommonTest, u = chr(c) for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', 'raw_unicode_escape', - 'unicode_escape', 'unicode_internal'): - with warnings.catch_warnings(): - # unicode-internal has been deprecated - warnings.simplefilter("ignore", DeprecationWarning) - - self.assertEqual(str(u.encode(encoding),encoding), u) + 'unicode_escape'): + self.assertEqual(str(u.encode(encoding),encoding), u) # Roundtrip safety for BMP (just the first 256 chars) for c in range(256): @@ -2125,13 +2121,9 @@ class UnicodeTest(string_tests.CommonTest, # Roundtrip safety for non-BMP (just a few chars) with warnings.catch_warnings(): - # unicode-internal has been deprecated - warnings.simplefilter("ignore", DeprecationWarning) - u = '\U00010001\U00020002\U00030003\U00040004\U00050005' for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', - 'raw_unicode_escape', - 'unicode_escape', 'unicode_internal'): + 'raw_unicode_escape', 'unicode_escape'): self.assertEqual(str(u.encode(encoding),encoding), u) # UTF-8 must be roundtrip safe for all code points @@ -2349,22 +2341,22 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual(args[0], text) self.assertEqual(len(args), 1) + @support.cpython_only def test_resize(self): + from _testcapi import getargs_u for length in range(1, 100, 7): # generate a fresh string (refcount=1) text = 'a' * length + 'b' - with support.check_warnings(('unicode_internal codec has been ' - 'deprecated', DeprecationWarning)): - # fill wstr internal field - abc = text.encode('unicode_internal') - self.assertEqual(abc.decode('unicode_internal'), text) - - # resize text: wstr field must be cleared and then recomputed - text += 'c' - abcdef = text.encode('unicode_internal') - self.assertNotEqual(abc, abcdef) - self.assertEqual(abcdef.decode('unicode_internal'), text) + # fill wstr internal field + abc = getargs_u(text) + self.assertEqual(abc, text) + + # resize text: wstr field must be cleared and then recomputed + text += 'c' + abcdef = getargs_u(text) + self.assertNotEqual(abc, abcdef) + self.assertEqual(abcdef, text) def test_compare(self): # Issue #17615 diff --git a/Misc/NEWS.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst b/Misc/NEWS.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst new file mode 100644 index 0000000..f633fee --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst @@ -0,0 +1,2 @@ +"unicode_internal" codec is removed. It was deprecated since Python 3.3. +Patch by Inada Naoki. diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index e0d6902..90b3e37 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -21,8 +21,7 @@ (Unicode object, bytes consumed) These <encoding>s are available: utf_8, unicode_escape, - raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit), - mbcs (on win32). + raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32). Written by Marc-Andre Lemburg (mal@lemburg.com). @@ -251,38 +250,6 @@ _codecs_escape_encode_impl(PyObject *module, PyObject *data, /* --- Decoder ------------------------------------------------------------ */ /*[clinic input] -_codecs.unicode_internal_decode - obj: object - errors: str(accept={str, NoneType}) = NULL - / -[clinic start generated code]*/ - -static PyObject * -_codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj, - const char *errors) -/*[clinic end generated code: output=edbfe175e09eff9a input=8d57930aeda170c6]*/ -{ - if (PyUnicode_Check(obj)) { - if (PyUnicode_READY(obj) < 0) - return NULL; - Py_INCREF(obj); - return codec_tuple(obj, PyUnicode_GET_LENGTH(obj)); - } - else { - Py_buffer view; - PyObject *result; - if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0) - return NULL; - - result = codec_tuple( - _PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors), - view.len); - PyBuffer_Release(&view); - return result; - } -} - -/*[clinic input] _codecs.utf_7_decode data: Py_buffer errors: str(accept={str, NoneType}) = NULL @@ -687,51 +654,6 @@ _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data, } /*[clinic input] -_codecs.unicode_internal_encode - obj: object - errors: str(accept={str, NoneType}) = NULL - / -[clinic start generated code]*/ - -static PyObject * -_codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj, - const char *errors) -/*[clinic end generated code: output=a72507dde4ea558f input=8628f0280cf5ba61]*/ -{ - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "unicode_internal codec has been deprecated", - 1)) - return NULL; - - if (PyUnicode_Check(obj)) { - Py_UNICODE *u; - Py_ssize_t len, size; - - if (PyUnicode_READY(obj) < 0) - return NULL; - - u = PyUnicode_AsUnicodeAndSize(obj, &len); - if (u == NULL) - return NULL; - if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - return PyErr_NoMemory(); - size = len * sizeof(Py_UNICODE); - return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size), - PyUnicode_GET_LENGTH(obj)); - } - else { - Py_buffer view; - PyObject *result; - if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0) - return NULL; - result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len), - view.len); - PyBuffer_Release(&view); - return result; - } -} - -/*[clinic input] _codecs.utf_7_encode str: unicode errors: str(accept={str, NoneType}) = NULL @@ -1095,8 +1017,6 @@ static PyMethodDef _codecs_functions[] = { _CODECS_UTF_32_EX_DECODE_METHODDEF _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF - _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF - _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF _CODECS_LATIN_1_ENCODE_METHODDEF diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h index d1f4cf3..65e2483 100644 --- a/Modules/clinic/_codecsmodule.c.h +++ b/Modules/clinic/_codecsmodule.c.h @@ -370,57 +370,6 @@ exit: return return_value; } -PyDoc_STRVAR(_codecs_unicode_internal_decode__doc__, -"unicode_internal_decode($module, obj, errors=None, /)\n" -"--\n" -"\n"); - -#define _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF \ - {"unicode_internal_decode", (PyCFunction)(void(*)(void))_codecs_unicode_internal_decode, METH_FASTCALL, _codecs_unicode_internal_decode__doc__}, - -static PyObject * -_codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj, - const char *errors); - -static PyObject * -_codecs_unicode_internal_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) -{ - PyObject *return_value = NULL; - PyObject *obj; - const char *errors = NULL; - - if (!_PyArg_CheckPositional("unicode_internal_decode", nargs, 1, 2)) { - goto exit; - } - obj = args[0]; - if (nargs < 2) { - goto skip_optional; - } - if (args[1] == Py_None) { - errors = NULL; - } - else if (PyUnicode_Check(args[1])) { - Py_ssize_t errors_length; - errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); - if (errors == NULL) { - goto exit; - } - if (strlen(errors) != (size_t)errors_length) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - goto exit; - } - } - else { - _PyArg_BadArgument("unicode_internal_decode", 2, "str or None", args[1]); - goto exit; - } -skip_optional: - return_value = _codecs_unicode_internal_decode_impl(module, obj, errors); - -exit: - return return_value; -} - PyDoc_STRVAR(_codecs_utf_7_decode__doc__, "utf_7_decode($module, data, errors=None, final=False, /)\n" "--\n" @@ -1853,57 +1802,6 @@ exit: return return_value; } -PyDoc_STRVAR(_codecs_unicode_internal_encode__doc__, -"unicode_internal_encode($module, obj, errors=None, /)\n" -"--\n" -"\n"); - -#define _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF \ - {"unicode_internal_encode", (PyCFunction)(void(*)(void))_codecs_unicode_internal_encode, METH_FASTCALL, _codecs_unicode_internal_encode__doc__}, - -static PyObject * -_codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj, - const char *errors); - -static PyObject * -_codecs_unicode_internal_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) -{ - PyObject *return_value = NULL; - PyObject *obj; - const char *errors = NULL; - - if (!_PyArg_CheckPositional("unicode_internal_encode", nargs, 1, 2)) { - goto exit; - } - obj = args[0]; - if (nargs < 2) { - goto skip_optional; - } - if (args[1] == Py_None) { - errors = NULL; - } - else if (PyUnicode_Check(args[1])) { - Py_ssize_t errors_length; - errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); - if (errors == NULL) { - goto exit; - } - if (strlen(errors) != (size_t)errors_length) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - goto exit; - } - } - else { - _PyArg_BadArgument("unicode_internal_encode", 2, "str or None", args[1]); - goto exit; - } -skip_optional: - return_value = _codecs_unicode_internal_encode_impl(module, obj, errors); - -exit: - return return_value; -} - PyDoc_STRVAR(_codecs_utf_7_encode__doc__, "utf_7_encode($module, str, errors=None, /)\n" "--\n" @@ -3024,4 +2922,4 @@ exit: #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF #define _CODECS_CODE_PAGE_ENCODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */ -/*[clinic end generated code: output=02bd0f0cf9a28150 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=da3c47709a55a05e input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8141ce7..b3a851a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6551,108 +6551,6 @@ PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, return result; } -/* --- Unicode Internal Codec ------------------------------------------- */ - -PyObject * -_PyUnicode_DecodeUnicodeInternal(const char *s, - Py_ssize_t size, - const char *errors) -{ - const char *starts = s; - Py_ssize_t startinpos; - Py_ssize_t endinpos; - _PyUnicodeWriter writer; - const char *end; - const char *reason; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "unicode_internal codec has been deprecated", - 1)) - return NULL; - - if (size < 0) { - PyErr_BadInternalCall(); - return NULL; - } - if (size == 0) - _Py_RETURN_UNICODE_EMPTY(); - - _PyUnicodeWriter_Init(&writer); - if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) { - PyErr_NoMemory(); - goto onError; - } - writer.min_length = (size + (Py_UNICODE_SIZE - 1)) / Py_UNICODE_SIZE; - - end = s + size; - while (s < end) { - Py_UNICODE uch; - Py_UCS4 ch; - if (end - s < Py_UNICODE_SIZE) { - endinpos = end-starts; - reason = "truncated input"; - goto error; - } - /* We copy the raw representation one byte at a time because the - pointer may be unaligned (see test_codeccallbacks). */ - ((char *) &uch)[0] = s[0]; - ((char *) &uch)[1] = s[1]; -#ifdef Py_UNICODE_WIDE - ((char *) &uch)[2] = s[2]; - ((char *) &uch)[3] = s[3]; -#endif - ch = uch; -#ifdef Py_UNICODE_WIDE - /* We have to sanity check the raw data, otherwise doom looms for - some malformed UCS-4 data. */ - if (ch > 0x10ffff) { - endinpos = s - starts + Py_UNICODE_SIZE; - reason = "illegal code point (> 0x10FFFF)"; - goto error; - } -#endif - s += Py_UNICODE_SIZE; -#ifndef Py_UNICODE_WIDE - if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE) - { - Py_UNICODE uch2; - ((char *) &uch2)[0] = s[0]; - ((char *) &uch2)[1] = s[1]; - if (Py_UNICODE_IS_LOW_SURROGATE(uch2)) - { - ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2); - s += Py_UNICODE_SIZE; - } - } -#endif - - if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0) - goto onError; - continue; - - error: - startinpos = s - starts; - if (unicode_decode_call_errorhandler_writer( - errors, &errorHandler, - "unicode_internal", reason, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &writer)) - goto onError; - } - - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return _PyUnicodeWriter_Finish(&writer); - - onError: - _PyUnicodeWriter_Dealloc(&writer); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return NULL; -} - /* --- Latin-1 Codec ------------------------------------------------------ */ PyObject * diff --git a/PCbuild/lib.pyproj b/PCbuild/lib.pyproj index 701b55f..ffb95c6 100644 --- a/PCbuild/lib.pyproj +++ b/PCbuild/lib.pyproj @@ -392,7 +392,6 @@ <Compile Include="encodings\tis_620.py" /> <Compile Include="encodings\undefined.py" /> <Compile Include="encodings\unicode_escape.py" /> - <Compile Include="encodings\unicode_internal.py" /> <Compile Include="encodings\utf_16.py" /> <Compile Include="encodings\utf_16_be.py" /> <Compile Include="encodings\utf_16_le.py" /> |