summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/codecs.rst14
-rw-r--r--Doc/whatsnew/3.8.rst3
-rw-r--r--Include/cpython/unicodeobject.h9
-rw-r--r--Lib/encodings/unicode_internal.py45
-rw-r--r--Lib/test/test_codeccallbacks.py66
-rw-r--r--Lib/test/test_codecs.py107
-rw-r--r--Lib/test/test_unicode.py36
-rw-r--r--Misc/NEWS.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst2
-rw-r--r--Modules/_codecsmodule.c82
-rw-r--r--Modules/clinic/_codecsmodule.c.h104
-rw-r--r--Objects/unicodeobject.c102
-rw-r--r--PCbuild/lib.pyproj1
12 files changed, 41 insertions, 530 deletions
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index 7cfec63..d2a0c8b 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -1316,16 +1316,10 @@ encodings.
| | | code actually uses UTF-8 |
| | | by default. |
+--------------------+---------+---------------------------+
-| unicode_internal | | Return the internal |
-| | | representation of the |
-| | | operand. Stateful codecs |
-| | | are not supported. |
-| | | |
-| | | .. deprecated:: 3.3 |
-| | | This representation is |
-| | | obsoleted by |
-| | | :pep:`393`. |
-+--------------------+---------+---------------------------+
+
+.. versionchanged:: 3.8
+ "unicode_internal" codec is removed.
+
.. _binary-transforms:
diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst
index eaa3f5b..31baccd 100644
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -573,6 +573,9 @@ The following features and APIs have been removed from Python 3.8:
* Removed the ``doctype()`` method of :class:`~xml.etree.ElementTree.XMLParser`.
(Contributed by Serhiy Storchaka in :issue:`29209`.)
+* "unicode_internal" codec is removed.
+ (Contributed by Inada Naoki in :issue:`36297`.)
+
Porting to Python 3.8
=====================
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index c763490..4eecc96 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -896,15 +896,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Py_ssize_t length /* Number of Py_UNICODE chars to encode */
) Py_DEPRECATED(3.3);
-/* --- Unicode Internal Codec --------------------------------------------- */
-
-/* Only for internal use in _codecsmodule.c */
-PyObject *_PyUnicode_DecodeUnicodeInternal(
- const char *string,
- Py_ssize_t length,
- const char *errors
- );
-
/* --- Latin-1 Codecs ----------------------------------------------------- */
PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
diff --git a/Lib/encodings/unicode_internal.py b/Lib/encodings/unicode_internal.py
deleted file mode 100644
index df3e775..0000000
--- a/Lib/encodings/unicode_internal.py
+++ /dev/null
@@ -1,45 +0,0 @@
-""" Python 'unicode-internal' Codec
-
-
-Written by Marc-Andre Lemburg (mal@lemburg.com).
-
-(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
-
-"""
-import codecs
-
-### Codec APIs
-
-class Codec(codecs.Codec):
-
- # Note: Binding these as C functions will result in the class not
- # converting them to methods. This is intended.
- encode = codecs.unicode_internal_encode
- decode = codecs.unicode_internal_decode
-
-class IncrementalEncoder(codecs.IncrementalEncoder):
- def encode(self, input, final=False):
- return codecs.unicode_internal_encode(input, self.errors)[0]
-
-class IncrementalDecoder(codecs.IncrementalDecoder):
- def decode(self, input, final=False):
- return codecs.unicode_internal_decode(input, self.errors)[0]
-
-class StreamWriter(Codec,codecs.StreamWriter):
- pass
-
-class StreamReader(Codec,codecs.StreamReader):
- pass
-
-### encodings module API
-
-def getregentry():
- return codecs.CodecInfo(
- name='unicode-internal',
- encode=Codec.encode,
- decode=Codec.decode,
- incrementalencoder=IncrementalEncoder,
- incrementaldecoder=IncrementalDecoder,
- streamwriter=StreamWriter,
- streamreader=StreamReader,
- )
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index e2e7463..585992b 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -211,42 +211,6 @@ class CodecCallbackTest(unittest.TestCase):
charmap[ord("?")] = "XYZ" # wrong type in mapping
self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
- def test_decodeunicodeinternal(self):
- with test.support.check_warnings(('unicode_internal codec has been '
- 'deprecated', DeprecationWarning)):
- self.assertRaises(
- UnicodeDecodeError,
- b"\x00\x00\x00\x00\x00".decode,
- "unicode-internal",
- )
- if len('\0'.encode('unicode-internal')) == 4:
- def handler_unicodeinternal(exc):
- if not isinstance(exc, UnicodeDecodeError):
- raise TypeError("don't know how to handle %r" % exc)
- return ("\x01", 1)
-
- self.assertEqual(
- b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
- "\u0000"
- )
-
- self.assertEqual(
- b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
- "\u0000\ufffd"
- )
-
- self.assertEqual(
- b"\x00\x00\x00\x00\x00".decode("unicode-internal", "backslashreplace"),
- "\u0000\\x00"
- )
-
- codecs.register_error("test.hui", handler_unicodeinternal)
-
- self.assertEqual(
- b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
- "\u0000\u0001\u0000"
- )
-
def test_callbacks(self):
def handler1(exc):
r = range(exc.start, exc.end)
@@ -794,16 +758,13 @@ class CodecCallbackTest(unittest.TestCase):
("ascii", b"\xff"),
("utf-8", b"\xff"),
("utf-7", b"+x-"),
- ("unicode-internal", b"\x00"),
):
- with test.support.check_warnings():
- # unicode-internal has been deprecated
- self.assertRaises(
- TypeError,
- bytes.decode,
- enc,
- "test.badhandler"
- )
+ self.assertRaises(
+ TypeError,
+ bytes.decode,
+ enc,
+ "test.badhandler"
+ )
def test_lookup(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
@@ -1013,7 +974,6 @@ class CodecCallbackTest(unittest.TestCase):
("utf-32", b"\xff"),
("unicode-escape", b"\\u123g"),
("raw-unicode-escape", b"\\u123g"),
- ("unicode-internal", b"\xff"),
]
def replacing(exc):
@@ -1024,11 +984,9 @@ class CodecCallbackTest(unittest.TestCase):
raise TypeError("don't know how to handle %r" % exc)
codecs.register_error("test.replacing", replacing)
- with test.support.check_warnings():
- # unicode-internal has been deprecated
- for (encoding, data) in baddata:
- with self.assertRaises(TypeError):
- data.decode(encoding, "test.replacing")
+ for (encoding, data) in baddata:
+ with self.assertRaises(TypeError):
+ data.decode(encoding, "test.replacing")
def mutating(exc):
if isinstance(exc, UnicodeDecodeError):
@@ -1039,10 +997,8 @@ class CodecCallbackTest(unittest.TestCase):
codecs.register_error("test.mutating", mutating)
# If the decoder doesn't pick up the modified input the following
# will lead to an endless loop
- with test.support.check_warnings():
- # unicode-internal has been deprecated
- for (encoding, data) in baddata:
- self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
+ for (encoding, data) in baddata:
+ self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
# issue32583
def test_crashing_decode_handler(self):
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 893212e..e8c7d76 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1239,16 +1239,6 @@ class EscapeDecodeTest(unittest.TestCase):
self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8))
-class RecodingTest(unittest.TestCase):
- def test_recoding(self):
- f = io.BytesIO()
- with codecs.EncodedFile(f, "unicode_internal", "utf-8") as f2:
- f2.write("a")
- # Python used to crash on this at exit because of a refcount
- # bug in _codecsmodule.c
-
- self.assertTrue(f.closed)
-
# From RFC 3492
punycode_testcases = [
# A Arabic (Egyptian):
@@ -1378,87 +1368,6 @@ class PunycodeTest(unittest.TestCase):
self.assertEqual(uni, puny.decode("punycode"))
-class UnicodeInternalTest(unittest.TestCase):
- @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
- def test_bug1251300(self):
- # Decoding with unicode_internal used to not correctly handle "code
- # points" above 0x10ffff on UCS-4 builds.
- ok = [
- (b"\x00\x10\xff\xff", "\U0010ffff"),
- (b"\x00\x00\x01\x01", "\U00000101"),
- (b"", ""),
- ]
- not_ok = [
- b"\x7f\xff\xff\xff",
- b"\x80\x00\x00\x00",
- b"\x81\x00\x00\x00",
- b"\x00",
- b"\x00\x00\x00\x00\x00",
- ]
- for internal, uni in ok:
- if sys.byteorder == "little":
- internal = bytes(reversed(internal))
- with support.check_warnings():
- self.assertEqual(uni, internal.decode("unicode_internal"))
- for internal in not_ok:
- if sys.byteorder == "little":
- internal = bytes(reversed(internal))
- with support.check_warnings(('unicode_internal codec has been '
- 'deprecated', DeprecationWarning)):
- self.assertRaises(UnicodeDecodeError, internal.decode,
- "unicode_internal")
- if sys.byteorder == "little":
- invalid = b"\x00\x00\x11\x00"
- invalid_backslashreplace = r"\x00\x00\x11\x00"
- else:
- invalid = b"\x00\x11\x00\x00"
- invalid_backslashreplace = r"\x00\x11\x00\x00"
- with support.check_warnings():
- self.assertRaises(UnicodeDecodeError,
- invalid.decode, "unicode_internal")
- with support.check_warnings():
- self.assertEqual(invalid.decode("unicode_internal", "replace"),
- '\ufffd')
- with support.check_warnings():
- self.assertEqual(invalid.decode("unicode_internal", "backslashreplace"),
- invalid_backslashreplace)
-
- @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
- def test_decode_error_attributes(self):
- try:
- with support.check_warnings(('unicode_internal codec has been '
- 'deprecated', DeprecationWarning)):
- b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
- except UnicodeDecodeError as ex:
- self.assertEqual("unicode_internal", ex.encoding)
- self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
- self.assertEqual(4, ex.start)
- self.assertEqual(8, ex.end)
- else:
- self.fail()
-
- @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
- def test_decode_callback(self):
- codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
- decoder = codecs.getdecoder("unicode_internal")
- with support.check_warnings(('unicode_internal codec has been '
- 'deprecated', DeprecationWarning)):
- ab = "ab".encode("unicode_internal").decode()
- ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
- "ascii"),
- "UnicodeInternalTest")
- self.assertEqual(("ab", 12), ignored)
-
- def test_encode_length(self):
- with support.check_warnings(('unicode_internal codec has been '
- 'deprecated', DeprecationWarning)):
- # Issue 3739
- encoder = codecs.getencoder("unicode_internal")
- self.assertEqual(encoder("a")[1], 1)
- self.assertEqual(encoder("\xe9\u0142")[1], 2)
-
- self.assertEqual(codecs.escape_encode(br'\x00')[1], 4)
-
# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
nameprep_tests = [
# 3.1 Map to nothing.
@@ -1949,7 +1858,6 @@ all_unicode_encodings = [
"shift_jisx0213",
"tis_620",
"unicode_escape",
- "unicode_internal",
"utf_16",
"utf_16_be",
"utf_16_le",
@@ -1969,7 +1877,6 @@ if hasattr(codecs, "oem_encode"):
# The following encodings don't work in stateful mode
broken_unicode_with_stateful = [
"punycode",
- "unicode_internal"
]
@@ -1984,12 +1891,10 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
name = "latin_1"
self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
- with support.check_warnings():
- # unicode-internal has been deprecated
- (b, size) = codecs.getencoder(encoding)(s)
- self.assertEqual(size, len(s), "encoding=%r" % encoding)
- (chars, size) = codecs.getdecoder(encoding)(b)
- self.assertEqual(chars, s, "encoding=%r" % encoding)
+ (b, size) = codecs.getencoder(encoding)(s)
+ self.assertEqual(size, len(s), "encoding=%r" % encoding)
+ (chars, size) = codecs.getdecoder(encoding)(b)
+ self.assertEqual(chars, s, "encoding=%r" % encoding)
if encoding not in broken_unicode_with_stateful:
# check stream reader/writer
@@ -2116,9 +2021,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
def test_bad_encode_args(self):
for encoding in all_unicode_encodings:
encoder = codecs.getencoder(encoding)
- with support.check_warnings():
- # unicode-internal has been deprecated
- self.assertRaises(TypeError, encoder)
+ self.assertRaises(TypeError, encoder)
def test_encoding_map_type_initialized(self):
from encodings import cp1140
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index c277e70..1131efd 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2104,12 +2104,8 @@ class UnicodeTest(string_tests.CommonTest,
u = chr(c)
for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
'utf-16-be', 'raw_unicode_escape',
- 'unicode_escape', 'unicode_internal'):
- with warnings.catch_warnings():
- # unicode-internal has been deprecated
- warnings.simplefilter("ignore", DeprecationWarning)
-
- self.assertEqual(str(u.encode(encoding),encoding), u)
+ 'unicode_escape'):
+ self.assertEqual(str(u.encode(encoding),encoding), u)
# Roundtrip safety for BMP (just the first 256 chars)
for c in range(256):
@@ -2125,13 +2121,9 @@ class UnicodeTest(string_tests.CommonTest,
# Roundtrip safety for non-BMP (just a few chars)
with warnings.catch_warnings():
- # unicode-internal has been deprecated
- warnings.simplefilter("ignore", DeprecationWarning)
-
u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
- 'raw_unicode_escape',
- 'unicode_escape', 'unicode_internal'):
+ 'raw_unicode_escape', 'unicode_escape'):
self.assertEqual(str(u.encode(encoding),encoding), u)
# UTF-8 must be roundtrip safe for all code points
@@ -2349,22 +2341,22 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(args[0], text)
self.assertEqual(len(args), 1)
+ @support.cpython_only
def test_resize(self):
+ from _testcapi import getargs_u
for length in range(1, 100, 7):
# generate a fresh string (refcount=1)
text = 'a' * length + 'b'
- with support.check_warnings(('unicode_internal codec has been '
- 'deprecated', DeprecationWarning)):
- # fill wstr internal field
- abc = text.encode('unicode_internal')
- self.assertEqual(abc.decode('unicode_internal'), text)
-
- # resize text: wstr field must be cleared and then recomputed
- text += 'c'
- abcdef = text.encode('unicode_internal')
- self.assertNotEqual(abc, abcdef)
- self.assertEqual(abcdef.decode('unicode_internal'), text)
+ # fill wstr internal field
+ abc = getargs_u(text)
+ self.assertEqual(abc, text)
+
+ # resize text: wstr field must be cleared and then recomputed
+ text += 'c'
+ abcdef = getargs_u(text)
+ self.assertNotEqual(abc, abcdef)
+ self.assertEqual(abcdef, text)
def test_compare(self):
# Issue #17615
diff --git a/Misc/NEWS.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst b/Misc/NEWS.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst
new file mode 100644
index 0000000..f633fee
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst
@@ -0,0 +1,2 @@
+"unicode_internal" codec is removed. It was deprecated since Python 3.3.
+Patch by Inada Naoki.
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index e0d6902..90b3e37 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -21,8 +21,7 @@
(Unicode object, bytes consumed)
These <encoding>s are available: utf_8, unicode_escape,
- raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
- mbcs (on win32).
+ raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
Written by Marc-Andre Lemburg (mal@lemburg.com).
@@ -251,38 +250,6 @@ _codecs_escape_encode_impl(PyObject *module, PyObject *data,
/* --- Decoder ------------------------------------------------------------ */
/*[clinic input]
-_codecs.unicode_internal_decode
- obj: object
- errors: str(accept={str, NoneType}) = NULL
- /
-[clinic start generated code]*/
-
-static PyObject *
-_codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj,
- const char *errors)
-/*[clinic end generated code: output=edbfe175e09eff9a input=8d57930aeda170c6]*/
-{
- if (PyUnicode_Check(obj)) {
- if (PyUnicode_READY(obj) < 0)
- return NULL;
- Py_INCREF(obj);
- return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
- }
- else {
- Py_buffer view;
- PyObject *result;
- if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
- return NULL;
-
- result = codec_tuple(
- _PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors),
- view.len);
- PyBuffer_Release(&view);
- return result;
- }
-}
-
-/*[clinic input]
_codecs.utf_7_decode
data: Py_buffer
errors: str(accept={str, NoneType}) = NULL
@@ -687,51 +654,6 @@ _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
}
/*[clinic input]
-_codecs.unicode_internal_encode
- obj: object
- errors: str(accept={str, NoneType}) = NULL
- /
-[clinic start generated code]*/
-
-static PyObject *
-_codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj,
- const char *errors)
-/*[clinic end generated code: output=a72507dde4ea558f input=8628f0280cf5ba61]*/
-{
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "unicode_internal codec has been deprecated",
- 1))
- return NULL;
-
- if (PyUnicode_Check(obj)) {
- Py_UNICODE *u;
- Py_ssize_t len, size;
-
- if (PyUnicode_READY(obj) < 0)
- return NULL;
-
- u = PyUnicode_AsUnicodeAndSize(obj, &len);
- if (u == NULL)
- return NULL;
- if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
- return PyErr_NoMemory();
- size = len * sizeof(Py_UNICODE);
- return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
- PyUnicode_GET_LENGTH(obj));
- }
- else {
- Py_buffer view;
- PyObject *result;
- if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
- return NULL;
- result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len),
- view.len);
- PyBuffer_Release(&view);
- return result;
- }
-}
-
-/*[clinic input]
_codecs.utf_7_encode
str: unicode
errors: str(accept={str, NoneType}) = NULL
@@ -1095,8 +1017,6 @@ static PyMethodDef _codecs_functions[] = {
_CODECS_UTF_32_EX_DECODE_METHODDEF
_CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
_CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
- _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF
- _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF
_CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
_CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
_CODECS_LATIN_1_ENCODE_METHODDEF
diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h
index d1f4cf3..65e2483 100644
--- a/Modules/clinic/_codecsmodule.c.h
+++ b/Modules/clinic/_codecsmodule.c.h
@@ -370,57 +370,6 @@ exit:
return return_value;
}
-PyDoc_STRVAR(_codecs_unicode_internal_decode__doc__,
-"unicode_internal_decode($module, obj, errors=None, /)\n"
-"--\n"
-"\n");
-
-#define _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF \
- {"unicode_internal_decode", (PyCFunction)(void(*)(void))_codecs_unicode_internal_decode, METH_FASTCALL, _codecs_unicode_internal_decode__doc__},
-
-static PyObject *
-_codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj,
- const char *errors);
-
-static PyObject *
-_codecs_unicode_internal_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
-{
- PyObject *return_value = NULL;
- PyObject *obj;
- const char *errors = NULL;
-
- if (!_PyArg_CheckPositional("unicode_internal_decode", nargs, 1, 2)) {
- goto exit;
- }
- obj = args[0];
- if (nargs < 2) {
- goto skip_optional;
- }
- if (args[1] == Py_None) {
- errors = NULL;
- }
- else if (PyUnicode_Check(args[1])) {
- Py_ssize_t errors_length;
- errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length);
- if (errors == NULL) {
- goto exit;
- }
- if (strlen(errors) != (size_t)errors_length) {
- PyErr_SetString(PyExc_ValueError, "embedded null character");
- goto exit;
- }
- }
- else {
- _PyArg_BadArgument("unicode_internal_decode", 2, "str or None", args[1]);
- goto exit;
- }
-skip_optional:
- return_value = _codecs_unicode_internal_decode_impl(module, obj, errors);
-
-exit:
- return return_value;
-}
-
PyDoc_STRVAR(_codecs_utf_7_decode__doc__,
"utf_7_decode($module, data, errors=None, final=False, /)\n"
"--\n"
@@ -1853,57 +1802,6 @@ exit:
return return_value;
}
-PyDoc_STRVAR(_codecs_unicode_internal_encode__doc__,
-"unicode_internal_encode($module, obj, errors=None, /)\n"
-"--\n"
-"\n");
-
-#define _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF \
- {"unicode_internal_encode", (PyCFunction)(void(*)(void))_codecs_unicode_internal_encode, METH_FASTCALL, _codecs_unicode_internal_encode__doc__},
-
-static PyObject *
-_codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj,
- const char *errors);
-
-static PyObject *
-_codecs_unicode_internal_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
-{
- PyObject *return_value = NULL;
- PyObject *obj;
- const char *errors = NULL;
-
- if (!_PyArg_CheckPositional("unicode_internal_encode", nargs, 1, 2)) {
- goto exit;
- }
- obj = args[0];
- if (nargs < 2) {
- goto skip_optional;
- }
- if (args[1] == Py_None) {
- errors = NULL;
- }
- else if (PyUnicode_Check(args[1])) {
- Py_ssize_t errors_length;
- errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length);
- if (errors == NULL) {
- goto exit;
- }
- if (strlen(errors) != (size_t)errors_length) {
- PyErr_SetString(PyExc_ValueError, "embedded null character");
- goto exit;
- }
- }
- else {
- _PyArg_BadArgument("unicode_internal_encode", 2, "str or None", args[1]);
- goto exit;
- }
-skip_optional:
- return_value = _codecs_unicode_internal_encode_impl(module, obj, errors);
-
-exit:
- return return_value;
-}
-
PyDoc_STRVAR(_codecs_utf_7_encode__doc__,
"utf_7_encode($module, str, errors=None, /)\n"
"--\n"
@@ -3024,4 +2922,4 @@ exit:
#ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
#define _CODECS_CODE_PAGE_ENCODE_METHODDEF
#endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
-/*[clinic end generated code: output=02bd0f0cf9a28150 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=da3c47709a55a05e input=a9049054013a1b77]*/
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 8141ce7..b3a851a 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6551,108 +6551,6 @@ PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
return result;
}
-/* --- Unicode Internal Codec ------------------------------------------- */
-
-PyObject *
-_PyUnicode_DecodeUnicodeInternal(const char *s,
- Py_ssize_t size,
- const char *errors)
-{
- const char *starts = s;
- Py_ssize_t startinpos;
- Py_ssize_t endinpos;
- _PyUnicodeWriter writer;
- const char *end;
- const char *reason;
- PyObject *errorHandler = NULL;
- PyObject *exc = NULL;
-
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "unicode_internal codec has been deprecated",
- 1))
- return NULL;
-
- if (size < 0) {
- PyErr_BadInternalCall();
- return NULL;
- }
- if (size == 0)
- _Py_RETURN_UNICODE_EMPTY();
-
- _PyUnicodeWriter_Init(&writer);
- if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) {
- PyErr_NoMemory();
- goto onError;
- }
- writer.min_length = (size + (Py_UNICODE_SIZE - 1)) / Py_UNICODE_SIZE;
-
- end = s + size;
- while (s < end) {
- Py_UNICODE uch;
- Py_UCS4 ch;
- if (end - s < Py_UNICODE_SIZE) {
- endinpos = end-starts;
- reason = "truncated input";
- goto error;
- }
- /* We copy the raw representation one byte at a time because the
- pointer may be unaligned (see test_codeccallbacks). */
- ((char *) &uch)[0] = s[0];
- ((char *) &uch)[1] = s[1];
-#ifdef Py_UNICODE_WIDE
- ((char *) &uch)[2] = s[2];
- ((char *) &uch)[3] = s[3];
-#endif
- ch = uch;
-#ifdef Py_UNICODE_WIDE
- /* We have to sanity check the raw data, otherwise doom looms for
- some malformed UCS-4 data. */
- if (ch > 0x10ffff) {
- endinpos = s - starts + Py_UNICODE_SIZE;
- reason = "illegal code point (> 0x10FFFF)";
- goto error;
- }
-#endif
- s += Py_UNICODE_SIZE;
-#ifndef Py_UNICODE_WIDE
- if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
- {
- Py_UNICODE uch2;
- ((char *) &uch2)[0] = s[0];
- ((char *) &uch2)[1] = s[1];
- if (Py_UNICODE_IS_LOW_SURROGATE(uch2))
- {
- ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2);
- s += Py_UNICODE_SIZE;
- }
- }
-#endif
-
- if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
- goto onError;
- continue;
-
- error:
- startinpos = s - starts;
- if (unicode_decode_call_errorhandler_writer(
- errors, &errorHandler,
- "unicode_internal", reason,
- &starts, &end, &startinpos, &endinpos, &exc, &s,
- &writer))
- goto onError;
- }
-
- Py_XDECREF(errorHandler);
- Py_XDECREF(exc);
- return _PyUnicodeWriter_Finish(&writer);
-
- onError:
- _PyUnicodeWriter_Dealloc(&writer);
- Py_XDECREF(errorHandler);
- Py_XDECREF(exc);
- return NULL;
-}
-
/* --- Latin-1 Codec ------------------------------------------------------ */
PyObject *
diff --git a/PCbuild/lib.pyproj b/PCbuild/lib.pyproj
index 701b55f..ffb95c6 100644
--- a/PCbuild/lib.pyproj
+++ b/PCbuild/lib.pyproj
@@ -392,7 +392,6 @@
<Compile Include="encodings\tis_620.py" />
<Compile Include="encodings\undefined.py" />
<Compile Include="encodings\unicode_escape.py" />
- <Compile Include="encodings\unicode_internal.py" />
<Compile Include="encodings\utf_16.py" />
<Compile Include="encodings\utf_16_be.py" />
<Compile Include="encodings\utf_16_le.py" />