summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rw-r--r--Lib/encodings/unicode_internal.py45
-rw-r--r--Lib/test/test_codeccallbacks.py66
-rw-r--r--Lib/test/test_codecs.py107
-rw-r--r--Lib/test/test_unicode.py36
4 files changed, 30 insertions, 224 deletions
diff --git a/Lib/encodings/unicode_internal.py b/Lib/encodings/unicode_internal.py
deleted file mode 100644
index df3e775..0000000
--- a/Lib/encodings/unicode_internal.py
+++ /dev/null
@@ -1,45 +0,0 @@
-""" Python 'unicode-internal' Codec
-
-
-Written by Marc-Andre Lemburg (mal@lemburg.com).
-
-(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
-
-"""
-import codecs
-
-### Codec APIs
-
-class Codec(codecs.Codec):
-
- # Note: Binding these as C functions will result in the class not
- # converting them to methods. This is intended.
- encode = codecs.unicode_internal_encode
- decode = codecs.unicode_internal_decode
-
-class IncrementalEncoder(codecs.IncrementalEncoder):
- def encode(self, input, final=False):
- return codecs.unicode_internal_encode(input, self.errors)[0]
-
-class IncrementalDecoder(codecs.IncrementalDecoder):
- def decode(self, input, final=False):
- return codecs.unicode_internal_decode(input, self.errors)[0]
-
-class StreamWriter(Codec,codecs.StreamWriter):
- pass
-
-class StreamReader(Codec,codecs.StreamReader):
- pass
-
-### encodings module API
-
-def getregentry():
- return codecs.CodecInfo(
- name='unicode-internal',
- encode=Codec.encode,
- decode=Codec.decode,
- incrementalencoder=IncrementalEncoder,
- incrementaldecoder=IncrementalDecoder,
- streamwriter=StreamWriter,
- streamreader=StreamReader,
- )
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index e2e7463..585992b 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -211,42 +211,6 @@ class CodecCallbackTest(unittest.TestCase):
charmap[ord("?")] = "XYZ" # wrong type in mapping
self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
- def test_decodeunicodeinternal(self):
- with test.support.check_warnings(('unicode_internal codec has been '
- 'deprecated', DeprecationWarning)):
- self.assertRaises(
- UnicodeDecodeError,
- b"\x00\x00\x00\x00\x00".decode,
- "unicode-internal",
- )
- if len('\0'.encode('unicode-internal')) == 4:
- def handler_unicodeinternal(exc):
- if not isinstance(exc, UnicodeDecodeError):
- raise TypeError("don't know how to handle %r" % exc)
- return ("\x01", 1)
-
- self.assertEqual(
- b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
- "\u0000"
- )
-
- self.assertEqual(
- b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
- "\u0000\ufffd"
- )
-
- self.assertEqual(
- b"\x00\x00\x00\x00\x00".decode("unicode-internal", "backslashreplace"),
- "\u0000\\x00"
- )
-
- codecs.register_error("test.hui", handler_unicodeinternal)
-
- self.assertEqual(
- b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
- "\u0000\u0001\u0000"
- )
-
def test_callbacks(self):
def handler1(exc):
r = range(exc.start, exc.end)
@@ -794,16 +758,13 @@ class CodecCallbackTest(unittest.TestCase):
("ascii", b"\xff"),
("utf-8", b"\xff"),
("utf-7", b"+x-"),
- ("unicode-internal", b"\x00"),
):
- with test.support.check_warnings():
- # unicode-internal has been deprecated
- self.assertRaises(
- TypeError,
- bytes.decode,
- enc,
- "test.badhandler"
- )
+ self.assertRaises(
+ TypeError,
+ bytes.decode,
+ enc,
+ "test.badhandler"
+ )
def test_lookup(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
@@ -1013,7 +974,6 @@ class CodecCallbackTest(unittest.TestCase):
("utf-32", b"\xff"),
("unicode-escape", b"\\u123g"),
("raw-unicode-escape", b"\\u123g"),
- ("unicode-internal", b"\xff"),
]
def replacing(exc):
@@ -1024,11 +984,9 @@ class CodecCallbackTest(unittest.TestCase):
raise TypeError("don't know how to handle %r" % exc)
codecs.register_error("test.replacing", replacing)
- with test.support.check_warnings():
- # unicode-internal has been deprecated
- for (encoding, data) in baddata:
- with self.assertRaises(TypeError):
- data.decode(encoding, "test.replacing")
+ for (encoding, data) in baddata:
+ with self.assertRaises(TypeError):
+ data.decode(encoding, "test.replacing")
def mutating(exc):
if isinstance(exc, UnicodeDecodeError):
@@ -1039,10 +997,8 @@ class CodecCallbackTest(unittest.TestCase):
codecs.register_error("test.mutating", mutating)
# If the decoder doesn't pick up the modified input the following
# will lead to an endless loop
- with test.support.check_warnings():
- # unicode-internal has been deprecated
- for (encoding, data) in baddata:
- self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
+ for (encoding, data) in baddata:
+ self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
# issue32583
def test_crashing_decode_handler(self):
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 893212e..e8c7d76 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1239,16 +1239,6 @@ class EscapeDecodeTest(unittest.TestCase):
self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8))
-class RecodingTest(unittest.TestCase):
- def test_recoding(self):
- f = io.BytesIO()
- with codecs.EncodedFile(f, "unicode_internal", "utf-8") as f2:
- f2.write("a")
- # Python used to crash on this at exit because of a refcount
- # bug in _codecsmodule.c
-
- self.assertTrue(f.closed)
-
# From RFC 3492
punycode_testcases = [
# A Arabic (Egyptian):
@@ -1378,87 +1368,6 @@ class PunycodeTest(unittest.TestCase):
self.assertEqual(uni, puny.decode("punycode"))
-class UnicodeInternalTest(unittest.TestCase):
- @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
- def test_bug1251300(self):
- # Decoding with unicode_internal used to not correctly handle "code
- # points" above 0x10ffff on UCS-4 builds.
- ok = [
- (b"\x00\x10\xff\xff", "\U0010ffff"),
- (b"\x00\x00\x01\x01", "\U00000101"),
- (b"", ""),
- ]
- not_ok = [
- b"\x7f\xff\xff\xff",
- b"\x80\x00\x00\x00",
- b"\x81\x00\x00\x00",
- b"\x00",
- b"\x00\x00\x00\x00\x00",
- ]
- for internal, uni in ok:
- if sys.byteorder == "little":
- internal = bytes(reversed(internal))
- with support.check_warnings():
- self.assertEqual(uni, internal.decode("unicode_internal"))
- for internal in not_ok:
- if sys.byteorder == "little":
- internal = bytes(reversed(internal))
- with support.check_warnings(('unicode_internal codec has been '
- 'deprecated', DeprecationWarning)):
- self.assertRaises(UnicodeDecodeError, internal.decode,
- "unicode_internal")
- if sys.byteorder == "little":
- invalid = b"\x00\x00\x11\x00"
- invalid_backslashreplace = r"\x00\x00\x11\x00"
- else:
- invalid = b"\x00\x11\x00\x00"
- invalid_backslashreplace = r"\x00\x11\x00\x00"
- with support.check_warnings():
- self.assertRaises(UnicodeDecodeError,
- invalid.decode, "unicode_internal")
- with support.check_warnings():
- self.assertEqual(invalid.decode("unicode_internal", "replace"),
- '\ufffd')
- with support.check_warnings():
- self.assertEqual(invalid.decode("unicode_internal", "backslashreplace"),
- invalid_backslashreplace)
-
- @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
- def test_decode_error_attributes(self):
- try:
- with support.check_warnings(('unicode_internal codec has been '
- 'deprecated', DeprecationWarning)):
- b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
- except UnicodeDecodeError as ex:
- self.assertEqual("unicode_internal", ex.encoding)
- self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
- self.assertEqual(4, ex.start)
- self.assertEqual(8, ex.end)
- else:
- self.fail()
-
- @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
- def test_decode_callback(self):
- codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
- decoder = codecs.getdecoder("unicode_internal")
- with support.check_warnings(('unicode_internal codec has been '
- 'deprecated', DeprecationWarning)):
- ab = "ab".encode("unicode_internal").decode()
- ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
- "ascii"),
- "UnicodeInternalTest")
- self.assertEqual(("ab", 12), ignored)
-
- def test_encode_length(self):
- with support.check_warnings(('unicode_internal codec has been '
- 'deprecated', DeprecationWarning)):
- # Issue 3739
- encoder = codecs.getencoder("unicode_internal")
- self.assertEqual(encoder("a")[1], 1)
- self.assertEqual(encoder("\xe9\u0142")[1], 2)
-
- self.assertEqual(codecs.escape_encode(br'\x00')[1], 4)
-
# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
nameprep_tests = [
# 3.1 Map to nothing.
@@ -1949,7 +1858,6 @@ all_unicode_encodings = [
"shift_jisx0213",
"tis_620",
"unicode_escape",
- "unicode_internal",
"utf_16",
"utf_16_be",
"utf_16_le",
@@ -1969,7 +1877,6 @@ if hasattr(codecs, "oem_encode"):
# The following encodings don't work in stateful mode
broken_unicode_with_stateful = [
"punycode",
- "unicode_internal"
]
@@ -1984,12 +1891,10 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
name = "latin_1"
self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
- with support.check_warnings():
- # unicode-internal has been deprecated
- (b, size) = codecs.getencoder(encoding)(s)
- self.assertEqual(size, len(s), "encoding=%r" % encoding)
- (chars, size) = codecs.getdecoder(encoding)(b)
- self.assertEqual(chars, s, "encoding=%r" % encoding)
+ (b, size) = codecs.getencoder(encoding)(s)
+ self.assertEqual(size, len(s), "encoding=%r" % encoding)
+ (chars, size) = codecs.getdecoder(encoding)(b)
+ self.assertEqual(chars, s, "encoding=%r" % encoding)
if encoding not in broken_unicode_with_stateful:
# check stream reader/writer
@@ -2116,9 +2021,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
def test_bad_encode_args(self):
for encoding in all_unicode_encodings:
encoder = codecs.getencoder(encoding)
- with support.check_warnings():
- # unicode-internal has been deprecated
- self.assertRaises(TypeError, encoder)
+ self.assertRaises(TypeError, encoder)
def test_encoding_map_type_initialized(self):
from encodings import cp1140
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index c277e70..1131efd 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2104,12 +2104,8 @@ class UnicodeTest(string_tests.CommonTest,
u = chr(c)
for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
'utf-16-be', 'raw_unicode_escape',
- 'unicode_escape', 'unicode_internal'):
- with warnings.catch_warnings():
- # unicode-internal has been deprecated
- warnings.simplefilter("ignore", DeprecationWarning)
-
- self.assertEqual(str(u.encode(encoding),encoding), u)
+ 'unicode_escape'):
+ self.assertEqual(str(u.encode(encoding),encoding), u)
# Roundtrip safety for BMP (just the first 256 chars)
for c in range(256):
@@ -2125,13 +2121,9 @@ class UnicodeTest(string_tests.CommonTest,
# Roundtrip safety for non-BMP (just a few chars)
with warnings.catch_warnings():
- # unicode-internal has been deprecated
- warnings.simplefilter("ignore", DeprecationWarning)
-
u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
- 'raw_unicode_escape',
- 'unicode_escape', 'unicode_internal'):
+ 'raw_unicode_escape', 'unicode_escape'):
self.assertEqual(str(u.encode(encoding),encoding), u)
# UTF-8 must be roundtrip safe for all code points
@@ -2349,22 +2341,22 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(args[0], text)
self.assertEqual(len(args), 1)
+ @support.cpython_only
def test_resize(self):
+ from _testcapi import getargs_u
for length in range(1, 100, 7):
# generate a fresh string (refcount=1)
text = 'a' * length + 'b'
- with support.check_warnings(('unicode_internal codec has been '
- 'deprecated', DeprecationWarning)):
- # fill wstr internal field
- abc = text.encode('unicode_internal')
- self.assertEqual(abc.decode('unicode_internal'), text)
-
- # resize text: wstr field must be cleared and then recomputed
- text += 'c'
- abcdef = text.encode('unicode_internal')
- self.assertNotEqual(abc, abcdef)
- self.assertEqual(abcdef.decode('unicode_internal'), text)
+ # fill wstr internal field
+ abc = getargs_u(text)
+ self.assertEqual(abc, text)
+
+ # resize text: wstr field must be cleared and then recomputed
+ text += 'c'
+ abcdef = getargs_u(text)
+ self.assertNotEqual(abc, abcdef)
+ self.assertEqual(abcdef, text)
def test_compare(self):
# Issue #17615