summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorSteve Dower <steve.dower@microsoft.com>2016-09-07 02:42:27 (GMT)
committerSteve Dower <steve.dower@microsoft.com>2016-09-07 02:42:27 (GMT)
commitf5aba58480bb0dd45181f609487ac2ecfcc98673 (patch)
tree0d70301224f945e379c37058fa6b24b6ba8c78e5 /Lib
parent22d0698d3b034f4f4314aa793da7225a5da640ba (diff)
downloadcpython-f5aba58480bb0dd45181f609487ac2ecfcc98673.zip
cpython-f5aba58480bb0dd45181f609487ac2ecfcc98673.tar.gz
cpython-f5aba58480bb0dd45181f609487ac2ecfcc98673.tar.bz2
Issue #27959: Adds oem encoding, alias ansi to mbcs, move aliasmbcs to codec lookup
Diffstat (limited to 'Lib')
-rw-r--r--Lib/encodings/__init__.py10
-rw-r--r--Lib/encodings/aliases.py1
-rw-r--r--Lib/encodings/oem.py41
-rw-r--r--Lib/site.py16
-rw-r--r--Lib/test/test_codecs.py62
5 files changed, 81 insertions, 49 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index 320011b..9a9b90b 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -29,6 +29,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
"""#"
import codecs
+import sys
from . import aliases
_cache = {}
@@ -151,3 +152,12 @@ def search_function(encoding):
# Register the search_function in the Python codec registry
codecs.register(search_function)
+
+if sys.platform == 'win32':
+ def _alias_mbcs(encoding):
+ import _bootlocale
+ if encoding == _bootlocale.getpreferredencoding(False):
+ import encodings.mbcs
+ return encodings.mbcs.getregentry()
+
+ codecs.register(_alias_mbcs)
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py
index 67c828d..2e63c2f 100644
--- a/Lib/encodings/aliases.py
+++ b/Lib/encodings/aliases.py
@@ -458,6 +458,7 @@ aliases = {
'macturkish' : 'mac_turkish',
# mbcs codec
+ 'ansi' : 'mbcs',
'dbcs' : 'mbcs',
# ptcp154 codec
diff --git a/Lib/encodings/oem.py b/Lib/encodings/oem.py
new file mode 100644
index 0000000..2c3426b
--- /dev/null
+++ b/Lib/encodings/oem.py
@@ -0,0 +1,41 @@
+""" Python 'oem' Codec for Windows
+
+"""
+# Import them explicitly to cause an ImportError
+# on non-Windows systems
+from codecs import oem_encode, oem_decode
+# for IncrementalDecoder, IncrementalEncoder, ...
+import codecs
+
+### Codec APIs
+
+encode = oem_encode
+
+def decode(input, errors='strict'):
+ return oem_decode(input, errors, True)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return oem_encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+ _buffer_decode = oem_decode
+
+class StreamWriter(codecs.StreamWriter):
+ encode = oem_encode
+
+class StreamReader(codecs.StreamReader):
+ decode = oem_decode
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='oem',
+ encode=encode,
+ decode=decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/Lib/site.py b/Lib/site.py
index a84e3bb..a536ef1 100644
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -423,21 +423,6 @@ def enablerlcompleter():
sys.__interactivehook__ = register_readline
-def aliasmbcs():
- """On Windows, some default encodings are not provided by Python,
- while they are always available as "mbcs" in each locale. Make
- them usable by aliasing to "mbcs" in such a case."""
- if sys.platform == 'win32':
- import _bootlocale, codecs
- enc = _bootlocale.getpreferredencoding(False)
- if enc.startswith('cp'): # "cp***" ?
- try:
- codecs.lookup(enc)
- except LookupError:
- import encodings
- encodings._cache[enc] = encodings._unknown
- encodings.aliases.aliases[enc] = 'mbcs'
-
CONFIG_LINE = r'^(?P<key>(\w|[-_])+)\s*=\s*(?P<value>.*)\s*$'
def venv(known_paths):
@@ -560,7 +545,6 @@ def main():
setcopyright()
sethelper()
enablerlcompleter()
- aliasmbcs()
execsitecustomize()
if ENABLE_USER_SITE:
execusercustomize()
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index d875340..825a7dd 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -8,11 +8,6 @@ import encodings
from test import support
-if sys.platform == 'win32':
- VISTA_OR_LATER = (sys.getwindowsversion().major >= 6)
-else:
- VISTA_OR_LATER = False
-
try:
import ctypes
except ImportError:
@@ -841,18 +836,13 @@ class CP65001Test(ReadTest, unittest.TestCase):
('abc', 'strict', b'abc'),
('\xe9\u20ac', 'strict', b'\xc3\xa9\xe2\x82\xac'),
('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'),
+ ('\udc80', 'strict', None),
+ ('\udc80', 'ignore', b''),
+ ('\udc80', 'replace', b'?'),
+ ('\udc80', 'backslashreplace', b'\\udc80'),
+ ('\udc80', 'namereplace', b'\\udc80'),
+ ('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
]
- if VISTA_OR_LATER:
- tests.extend((
- ('\udc80', 'strict', None),
- ('\udc80', 'ignore', b''),
- ('\udc80', 'replace', b'?'),
- ('\udc80', 'backslashreplace', b'\\udc80'),
- ('\udc80', 'namereplace', b'\\udc80'),
- ('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
- ))
- else:
- tests.append(('\udc80', 'strict', b'\xed\xb2\x80'))
for text, errors, expected in tests:
if expected is not None:
try:
@@ -879,17 +869,10 @@ class CP65001Test(ReadTest, unittest.TestCase):
(b'[\xff]', 'ignore', '[]'),
(b'[\xff]', 'replace', '[\ufffd]'),
(b'[\xff]', 'surrogateescape', '[\udcff]'),
+ (b'[\xed\xb2\x80]', 'strict', None),
+ (b'[\xed\xb2\x80]', 'ignore', '[]'),
+ (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
]
- if VISTA_OR_LATER:
- tests.extend((
- (b'[\xed\xb2\x80]', 'strict', None),
- (b'[\xed\xb2\x80]', 'ignore', '[]'),
- (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
- ))
- else:
- tests.extend((
- (b'[\xed\xb2\x80]', 'strict', '[\udc80]'),
- ))
for raw, errors, expected in tests:
if expected is not None:
try:
@@ -904,7 +887,6 @@ class CP65001Test(ReadTest, unittest.TestCase):
self.assertRaises(UnicodeDecodeError,
raw.decode, 'cp65001', errors)
- @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
def test_lone_surrogates(self):
self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001")
self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001")
@@ -921,7 +903,6 @@ class CP65001Test(ReadTest, unittest.TestCase):
self.assertEqual("[\uDC80]".encode("cp65001", "replace"),
b'[?]')
- @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
def test_surrogatepass_handler(self):
self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"),
b"abc\xed\xa0\x80def")
@@ -1951,6 +1932,8 @@ all_unicode_encodings = [
if hasattr(codecs, "mbcs_encode"):
all_unicode_encodings.append("mbcs")
+if hasattr(codecs, "oem_encode"):
+ all_unicode_encodings.append("oem")
# The following encoding is not tested, because it's not supposed
# to work:
@@ -3119,11 +3102,10 @@ class CodePageTest(unittest.TestCase):
(b'\xff\xf4\x8f\xbf\xbf', 'ignore', '\U0010ffff'),
(b'\xff\xf4\x8f\xbf\xbf', 'replace', '\ufffd\U0010ffff'),
))
- if VISTA_OR_LATER:
- self.check_encode(self.CP_UTF8, (
- ('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'),
- ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
- ))
+ self.check_encode(self.CP_UTF8, (
+ ('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'),
+ ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
+ ))
def test_incremental(self):
decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
@@ -3144,6 +3126,20 @@ class CodePageTest(unittest.TestCase):
False)
self.assertEqual(decoded, ('abc', 3))
+ def test_mbcs_alias(self):
+ # Check that looking up our 'default' codepage will return
+ # mbcs when we don't have a more specific one available
+ import _bootlocale
+ def _get_fake_codepage(*a):
+ return 'cp123'
+ old_getpreferredencoding = _bootlocale.getpreferredencoding
+ _bootlocale.getpreferredencoding = _get_fake_codepage
+ try:
+ codec = codecs.lookup('cp123')
+ self.assertEqual(codec.name, 'mbcs')
+ finally:
+ _bootlocale.getpreferredencoding = old_getpreferredencoding
+
class ASCIITest(unittest.TestCase):
def test_encode(self):