diff options
author | Steve Dower <steve.dower@microsoft.com> | 2016-09-07 02:42:27 (GMT) |
---|---|---|
committer | Steve Dower <steve.dower@microsoft.com> | 2016-09-07 02:42:27 (GMT) |
commit | f5aba58480bb0dd45181f609487ac2ecfcc98673 (patch) | |
tree | 0d70301224f945e379c37058fa6b24b6ba8c78e5 /Lib | |
parent | 22d0698d3b034f4f4314aa793da7225a5da640ba (diff) | |
download | cpython-f5aba58480bb0dd45181f609487ac2ecfcc98673.zip cpython-f5aba58480bb0dd45181f609487ac2ecfcc98673.tar.gz cpython-f5aba58480bb0dd45181f609487ac2ecfcc98673.tar.bz2 |
Issue #27959: Adds oem encoding, alias ansi to mbcs, move aliasmbcs to codec lookup
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/encodings/__init__.py | 10 | ||||
-rw-r--r-- | Lib/encodings/aliases.py | 1 | ||||
-rw-r--r-- | Lib/encodings/oem.py | 41 | ||||
-rw-r--r-- | Lib/site.py | 16 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 62 |
5 files changed, 81 insertions, 49 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index 320011b..9a9b90b 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -29,6 +29,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com). """#" import codecs +import sys from . import aliases _cache = {} @@ -151,3 +152,12 @@ def search_function(encoding): # Register the search_function in the Python codec registry codecs.register(search_function) + +if sys.platform == 'win32': + def _alias_mbcs(encoding): + import _bootlocale + if encoding == _bootlocale.getpreferredencoding(False): + import encodings.mbcs + return encodings.mbcs.getregentry() + + codecs.register(_alias_mbcs) diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index 67c828d..2e63c2f 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -458,6 +458,7 @@ aliases = { 'macturkish' : 'mac_turkish', # mbcs codec + 'ansi' : 'mbcs', 'dbcs' : 'mbcs', # ptcp154 codec diff --git a/Lib/encodings/oem.py b/Lib/encodings/oem.py new file mode 100644 index 0000000..2c3426b --- /dev/null +++ b/Lib/encodings/oem.py @@ -0,0 +1,41 @@ +""" Python 'oem' Codec for Windows + +""" +# Import them explicitly to cause an ImportError +# on non-Windows systems +from codecs import oem_encode, oem_decode +# for IncrementalDecoder, IncrementalEncoder, ... +import codecs + +### Codec APIs + +encode = oem_encode + +def decode(input, errors='strict'): + return oem_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return oem_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + _buffer_decode = oem_decode + +class StreamWriter(codecs.StreamWriter): + encode = oem_encode + +class StreamReader(codecs.StreamReader): + decode = oem_decode + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='oem', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/site.py b/Lib/site.py index a84e3bb..a536ef1 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -423,21 +423,6 @@ def enablerlcompleter(): sys.__interactivehook__ = register_readline -def aliasmbcs(): - """On Windows, some default encodings are not provided by Python, - while they are always available as "mbcs" in each locale. Make - them usable by aliasing to "mbcs" in such a case.""" - if sys.platform == 'win32': - import _bootlocale, codecs - enc = _bootlocale.getpreferredencoding(False) - if enc.startswith('cp'): # "cp***" ? - try: - codecs.lookup(enc) - except LookupError: - import encodings - encodings._cache[enc] = encodings._unknown - encodings.aliases.aliases[enc] = 'mbcs' - CONFIG_LINE = r'^(?P<key>(\w|[-_])+)\s*=\s*(?P<value>.*)\s*$' def venv(known_paths): @@ -560,7 +545,6 @@ def main(): setcopyright() sethelper() enablerlcompleter() - aliasmbcs() execsitecustomize() if ENABLE_USER_SITE: execusercustomize() diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index d875340..825a7dd 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -8,11 +8,6 @@ import encodings from test import support -if sys.platform == 'win32': - VISTA_OR_LATER = (sys.getwindowsversion().major >= 6) -else: - VISTA_OR_LATER = False - try: import ctypes except ImportError: @@ -841,18 +836,13 @@ class CP65001Test(ReadTest, unittest.TestCase): ('abc', 'strict', b'abc'), ('\xe9\u20ac', 'strict', b'\xc3\xa9\xe2\x82\xac'), ('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'), + ('\udc80', 'strict', None), + ('\udc80', 'ignore', b''), + ('\udc80', 'replace', b'?'), + ('\udc80', 'backslashreplace', b'\\udc80'), + ('\udc80', 'namereplace', b'\\udc80'), + ('\udc80', 'surrogatepass', b'\xed\xb2\x80'), ] - if VISTA_OR_LATER: - tests.extend(( - ('\udc80', 'strict', None), - ('\udc80', 'ignore', b''), - ('\udc80', 'replace', b'?'), - ('\udc80', 'backslashreplace', b'\\udc80'), - ('\udc80', 'namereplace', b'\\udc80'), - ('\udc80', 'surrogatepass', b'\xed\xb2\x80'), - )) - else: - tests.append(('\udc80', 'strict', b'\xed\xb2\x80')) for text, errors, expected in tests: if expected is not None: try: @@ -879,17 +869,10 @@ class CP65001Test(ReadTest, unittest.TestCase): (b'[\xff]', 'ignore', '[]'), (b'[\xff]', 'replace', '[\ufffd]'), (b'[\xff]', 'surrogateescape', '[\udcff]'), + (b'[\xed\xb2\x80]', 'strict', None), + (b'[\xed\xb2\x80]', 'ignore', '[]'), + (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'), ] - if VISTA_OR_LATER: - tests.extend(( - (b'[\xed\xb2\x80]', 'strict', None), - (b'[\xed\xb2\x80]', 'ignore', '[]'), - (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'), - )) - else: - tests.extend(( - (b'[\xed\xb2\x80]', 'strict', '[\udc80]'), - )) for raw, errors, expected in tests: if expected is not None: try: @@ -904,7 +887,6 @@ class CP65001Test(ReadTest, unittest.TestCase): self.assertRaises(UnicodeDecodeError, raw.decode, 'cp65001', errors) - @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later') def test_lone_surrogates(self): self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001") self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001") @@ -921,7 +903,6 @@ class CP65001Test(ReadTest, unittest.TestCase): self.assertEqual("[\uDC80]".encode("cp65001", "replace"), b'[?]') - @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later') def test_surrogatepass_handler(self): self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"), b"abc\xed\xa0\x80def") @@ -1951,6 +1932,8 @@ all_unicode_encodings = [ if hasattr(codecs, "mbcs_encode"): all_unicode_encodings.append("mbcs") +if hasattr(codecs, "oem_encode"): + all_unicode_encodings.append("oem") # The following encoding is not tested, because it's not supposed # to work: @@ -3119,11 +3102,10 @@ class CodePageTest(unittest.TestCase): (b'\xff\xf4\x8f\xbf\xbf', 'ignore', '\U0010ffff'), (b'\xff\xf4\x8f\xbf\xbf', 'replace', '\ufffd\U0010ffff'), )) - if VISTA_OR_LATER: - self.check_encode(self.CP_UTF8, ( - ('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'), - ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'), - )) + self.check_encode(self.CP_UTF8, ( + ('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'), + ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'), + )) def test_incremental(self): decoded = codecs.code_page_decode(932, b'\x82', 'strict', False) @@ -3144,6 +3126,20 @@ class CodePageTest(unittest.TestCase): False) self.assertEqual(decoded, ('abc', 3)) + def test_mbcs_alias(self): + # Check that looking up our 'default' codepage will return + # mbcs when we don't have a more specific one available + import _bootlocale + def _get_fake_codepage(*a): + return 'cp123' + old_getpreferredencoding = _bootlocale.getpreferredencoding + _bootlocale.getpreferredencoding = _get_fake_codepage + try: + codec = codecs.lookup('cp123') + self.assertEqual(codec.name, 'mbcs') + finally: + _bootlocale.getpreferredencoding = old_getpreferredencoding + class ASCIITest(unittest.TestCase): def test_encode(self): |