Issue #27959: Adds oem encoding, alias ansi to mbcs, move aliasmbcs to codec lookup

author: Steve Dower <steve.dower@microsoft.com> 2016-09-07 02:42:27 (GMT)
committer: Steve Dower <steve.dower@microsoft.com> 2016-09-07 02:42:27 (GMT)
commit: f5aba58480bb0dd45181f609487ac2ecfcc98673 (patch)
tree: 0d70301224f945e379c37058fa6b24b6ba8c78e5 /Lib
parent: 22d0698d3b034f4f4314aa793da7225a5da640ba (diff)
download: cpython-f5aba58480bb0dd45181f609487ac2ecfcc98673.zip
cpython-f5aba58480bb0dd45181f609487ac2ecfcc98673.tar.gz
cpython-f5aba58480bb0dd45181f609487ac2ecfcc98673.tar.bz2
5 files changed, 81 insertions, 49 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index 320011b..9a9b90b 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -29,6 +29,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
 """#"
 
 import codecs
+import sys
 from . import aliases
 
 _cache = {}
@@ -151,3 +152,12 @@ def search_function(encoding):
 
 # Register the search_function in the Python codec registry
 codecs.register(search_function)
+
+if sys.platform == 'win32':
+    def _alias_mbcs(encoding):
+        import _bootlocale
+        if encoding == _bootlocale.getpreferredencoding(False):
+            import encodings.mbcs
+            return encodings.mbcs.getregentry()
+
+    codecs.register(_alias_mbcs)
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py
index 67c828d..2e63c2f 100644
--- a/Lib/encodings/aliases.py
+++ b/Lib/encodings/aliases.py
@@ -458,6 +458,7 @@ aliases = {
     'macturkish'         : 'mac_turkish',
 
     # mbcs codec
+    'ansi'               : 'mbcs',
     'dbcs'               : 'mbcs',
 
     # ptcp154 codec
diff --git a/Lib/encodings/oem.py b/Lib/encodings/oem.py
new file mode 100644
index 0000000..2c3426b
--- /dev/null
+++ b/Lib/encodings/oem.py
@@ -0,0 +1,41 @@
+""" Python 'oem' Codec for Windows
+
+"""
+# Import them explicitly to cause an ImportError
+# on non-Windows systems
+from codecs import oem_encode, oem_decode
+# for IncrementalDecoder, IncrementalEncoder, ...
+import codecs
+
+### Codec APIs
+
+encode = oem_encode
+
+def decode(input, errors='strict'):
+    return oem_decode(input, errors, True)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        return oem_encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+    _buffer_decode = oem_decode
+
+class StreamWriter(codecs.StreamWriter):
+    encode = oem_encode
+
+class StreamReader(codecs.StreamReader):
+    decode = oem_decode
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='oem',
+        encode=encode,
+        decode=decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
diff --git a/Lib/site.py b/Lib/site.py
index a84e3bb..a536ef1 100644
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -423,21 +423,6 @@ def enablerlcompleter():
 
     sys.__interactivehook__ = register_readline
 
-def aliasmbcs():
-    """On Windows, some default encodings are not provided by Python,
-    while they are always available as "mbcs" in each locale. Make
-    them usable by aliasing to "mbcs" in such a case."""
-    if sys.platform == 'win32':
-        import _bootlocale, codecs
-        enc = _bootlocale.getpreferredencoding(False)
-        if enc.startswith('cp'):            # "cp***" ?
-            try:
-                codecs.lookup(enc)
-            except LookupError:
-                import encodings
-                encodings._cache[enc] = encodings._unknown
-                encodings.aliases.aliases[enc] = 'mbcs'
-
 CONFIG_LINE = r'^(?P<key>(\w|[-_])+)\s*=\s*(?P<value>.*)\s*$'
 
 def venv(known_paths):
@@ -560,7 +545,6 @@ def main():
     setcopyright()
     sethelper()
     enablerlcompleter()
-    aliasmbcs()
     execsitecustomize()
     if ENABLE_USER_SITE:
         execusercustomize()
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index d875340..825a7dd 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -8,11 +8,6 @@ import encodings
 
 from test import support
 
-if sys.platform == 'win32':
-    VISTA_OR_LATER = (sys.getwindowsversion().major >= 6)
-else:
-    VISTA_OR_LATER = False
-
 try:
     import ctypes
 except ImportError:
@@ -841,18 +836,13 @@ class CP65001Test(ReadTest, unittest.TestCase):
             ('abc', 'strict', b'abc'),
             ('\xe9\u20ac', 'strict',  b'\xc3\xa9\xe2\x82\xac'),
             ('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'),
+            ('\udc80', 'strict', None),
+            ('\udc80', 'ignore', b''),
+            ('\udc80', 'replace', b'?'),
+            ('\udc80', 'backslashreplace', b'\\udc80'),
+            ('\udc80', 'namereplace', b'\\udc80'),
+            ('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
         ]
-        if VISTA_OR_LATER:
-            tests.extend((
-                ('\udc80', 'strict', None),
-                ('\udc80', 'ignore', b''),
-                ('\udc80', 'replace', b'?'),
-                ('\udc80', 'backslashreplace', b'\\udc80'),
-                ('\udc80', 'namereplace', b'\\udc80'),
-                ('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
-            ))
-        else:
-            tests.append(('\udc80', 'strict', b'\xed\xb2\x80'))
         for text, errors, expected in tests:
             if expected is not None:
                 try:
@@ -879,17 +869,10 @@ class CP65001Test(ReadTest, unittest.TestCase):
             (b'[\xff]', 'ignore', '[]'),
             (b'[\xff]', 'replace', '[\ufffd]'),
             (b'[\xff]', 'surrogateescape', '[\udcff]'),
+            (b'[\xed\xb2\x80]', 'strict', None),
+            (b'[\xed\xb2\x80]', 'ignore', '[]'),
+            (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
         ]
-        if VISTA_OR_LATER:
-            tests.extend((
-                (b'[\xed\xb2\x80]', 'strict', None),
-                (b'[\xed\xb2\x80]', 'ignore', '[]'),
-                (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
-            ))
-        else:
-            tests.extend((
-                (b'[\xed\xb2\x80]', 'strict', '[\udc80]'),
-            ))
         for raw, errors, expected in tests:
             if expected is not None:
                 try:
@@ -904,7 +887,6 @@ class CP65001Test(ReadTest, unittest.TestCase):
                 self.assertRaises(UnicodeDecodeError,
                     raw.decode, 'cp65001', errors)
 
-    @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
     def test_lone_surrogates(self):
         self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001")
         self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001")
@@ -921,7 +903,6 @@ class CP65001Test(ReadTest, unittest.TestCase):
         self.assertEqual("[\uDC80]".encode("cp65001", "replace"),
                          b'[?]')
 
-    @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
     def test_surrogatepass_handler(self):
         self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"),
                          b"abc\xed\xa0\x80def")
@@ -1951,6 +1932,8 @@ all_unicode_encodings = [
 
 if hasattr(codecs, "mbcs_encode"):
     all_unicode_encodings.append("mbcs")
+if hasattr(codecs, "oem_encode"):
+    all_unicode_encodings.append("oem")
 
 # The following encoding is not tested, because it's not supposed
 # to work:
@@ -3119,11 +3102,10 @@ class CodePageTest(unittest.TestCase):
             (b'\xff\xf4\x8f\xbf\xbf', 'ignore', '\U0010ffff'),
             (b'\xff\xf4\x8f\xbf\xbf', 'replace', '\ufffd\U0010ffff'),
         ))
-        if VISTA_OR_LATER:
-            self.check_encode(self.CP_UTF8, (
-                ('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'),
-                ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
-            ))
+        self.check_encode(self.CP_UTF8, (
+            ('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'),
+            ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
+        ))
 
     def test_incremental(self):
         decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
@@ -3144,6 +3126,20 @@ class CodePageTest(unittest.TestCase):
                                           False)
         self.assertEqual(decoded, ('abc', 3))
 
+    def test_mbcs_alias(self):
+        # Check that looking up our 'default' codepage will return
+        # mbcs when we don't have a more specific one available
+        import _bootlocale
+        def _get_fake_codepage(*a):
+            return 'cp123'
+        old_getpreferredencoding = _bootlocale.getpreferredencoding
+        _bootlocale.getpreferredencoding = _get_fake_codepage
+        try:
+            codec = codecs.lookup('cp123')
+            self.assertEqual(codec.name, 'mbcs')
+        finally:
+            _bootlocale.getpreferredencoding = old_getpreferredencoding
+
 
 class ASCIITest(unittest.TestCase):
     def test_encode(self):
author	Steve Dower <steve.dower@microsoft.com>	2016-09-07 02:42:27 (GMT)
committer	Steve Dower <steve.dower@microsoft.com>	2016-09-07 02:42:27 (GMT)
commit	f5aba58480bb0dd45181f609487ac2ecfcc98673 (patch)
tree	0d70301224f945e379c37058fa6b24b6ba8c78e5 /Lib
parent	22d0698d3b034f4f4314aa793da7225a5da640ba (diff)
download	cpython-f5aba58480bb0dd45181f609487ac2ecfcc98673.zip cpython-f5aba58480bb0dd45181f609487ac2ecfcc98673.tar.gz cpython-f5aba58480bb0dd45181f609487ac2ecfcc98673.tar.bz2