summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2004-12-29 16:04:38 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2004-12-29 16:04:38 (GMT)
commitee1d24703ffc0ef91ca43a71fb691255e18e162e (patch)
tree0a7a1bb9c72f075d22a6579000713ddb076f2f91
parent6cea6933625910298c5fb156f365814eb3600494 (diff)
downloadcpython-ee1d24703ffc0ef91ca43a71fb691255e18e162e.zip
cpython-ee1d24703ffc0ef91ca43a71fb691255e18e162e.tar.gz
cpython-ee1d24703ffc0ef91ca43a71fb691255e18e162e.tar.bz2
Add a test that checks the basic functionality of every encoding.
-rw-r--r--Lib/test/test_codecs.py180
1 files changed, 179 insertions, 1 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 36c4040..99ed82d 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -549,6 +549,182 @@ class StreamReaderTest(unittest.TestCase):
f = self.reader(self.stream)
self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00'])
+all_unicode_encodings = [
+ "ascii",
+ "base64_codec",
+ "big5",
+ "big5hkscs",
+ "charmap",
+ "cp037",
+ "cp1006",
+ "cp1026",
+ "cp1140",
+ "cp1250",
+ "cp1251",
+ "cp1252",
+ "cp1253",
+ "cp1254",
+ "cp1255",
+ "cp1256",
+ "cp1257",
+ "cp1258",
+ "cp424",
+ "cp437",
+ "cp500",
+ "cp737",
+ "cp775",
+ "cp850",
+ "cp852",
+ "cp855",
+ "cp856",
+ "cp857",
+ "cp860",
+ "cp861",
+ "cp862",
+ "cp863",
+ "cp864",
+ "cp865",
+ "cp866",
+ "cp869",
+ "cp874",
+ "cp875",
+ "cp932",
+ "cp949",
+ "cp950",
+ "euc_jis_2004",
+ "euc_jisx0213",
+ "euc_jp",
+ "euc_kr",
+ "gb18030",
+ "gb2312",
+ "gbk",
+ "hex_codec",
+ "hp_roman8",
+ "hz",
+ "idna",
+ "iso2022_jp",
+ "iso2022_jp_1",
+ "iso2022_jp_2",
+ "iso2022_jp_2004",
+ "iso2022_jp_3",
+ "iso2022_jp_ext",
+ "iso2022_kr",
+ "iso8859_1",
+ "iso8859_10",
+ "iso8859_11",
+ "iso8859_13",
+ "iso8859_14",
+ "iso8859_15",
+ "iso8859_16",
+ "iso8859_2",
+ "iso8859_3",
+ "iso8859_4",
+ "iso8859_5",
+ "iso8859_6",
+ "iso8859_7",
+ "iso8859_8",
+ "iso8859_9",
+ "johab",
+ "koi8_r",
+ "koi8_u",
+ "latin_1",
+ "mac_cyrillic",
+ "mac_greek",
+ "mac_iceland",
+ "mac_latin2",
+ "mac_roman",
+ "mac_turkish",
+ "palmos",
+ "ptcp154",
+ "punycode",
+ "raw_unicode_escape",
+ "rot_13",
+ "shift_jis",
+ "shift_jis_2004",
+ "shift_jisx0213",
+ "tis_620",
+ "unicode_escape",
+ "unicode_internal",
+ "utf_16",
+ "utf_16_be",
+ "utf_16_le",
+ "utf_7",
+ "utf_8",
+]
+
+if hasattr(codecs, "mbcs_encode"):
+ all_unicode_encodings.append("mbcs")
+
+# The following encodings work only with str, not unicode
+all_string_encodings = [
+ "quopri_codec",
+ "string_escape",
+ "uu_codec",
+]
+
+# The following encoding is not tested, because it's not supposed
+# to work:
+# "undefined"
+
+# The following encodings don't work in stateful mode
+broken_unicode_with_streams = [
+ "base64_codec",
+ "hex_codec",
+ "punycode",
+ "unicode_internal"
+]
+
+try:
+ import bz2
+except ImportError:
+ pass
+else:
+ all_unicode_encodings.append("bz2_codec")
+ broken_unicode_with_streams.append("bz2_codec")
+
+try:
+ import zlib
+except ImportError:
+ pass
+else:
+ all_unicode_encodings.append("zlib_codec")
+ broken_unicode_with_streams.append("zlib_codec")
+
+class BasicUnicodeTest(unittest.TestCase):
+ def test_basics(self):
+ s = u"abc123" # all codecs should be able to encode these
+ for encoding in all_unicode_encodings:
+ (bytes, size) = codecs.getencoder(encoding)(s)
+ if encoding != "unicode_internal":
+ self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
+ (chars, size) = codecs.getdecoder(encoding)(bytes)
+ self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
+
+ if encoding not in broken_unicode_with_streams:
+ # check stream reader/writer
+ q = Queue()
+ writer = codecs.getwriter(encoding)(q)
+ encodedresult = ""
+ for c in s:
+ writer.write(c)
+ encodedresult += q.read()
+ q = Queue()
+ reader = codecs.getreader(encoding)(q)
+ decodedresult = u""
+ for c in encodedresult:
+ q.write(c)
+ decodedresult += reader.read()
+ self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
+
+class BasicStrTest(unittest.TestCase):
+ def test_basics(self):
+ s = "abc123"
+ for encoding in all_string_encodings:
+ (bytes, size) = codecs.getencoder(encoding)(s)
+ self.assertEqual(size, len(s))
+ (chars, size) = codecs.getdecoder(encoding)(bytes)
+ self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
+
def test_main():
test_support.run_unittest(
UTF16Test,
@@ -561,7 +737,9 @@ def test_main():
NameprepTest,
CodecTest,
CodecsModuleTest,
- StreamReaderTest
+ StreamReaderTest,
+ BasicUnicodeTest,
+ BasicStrTest
)