summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorHai Shi <shihai1992@gmail.com>2020-10-14 15:43:31 (GMT)
committerGitHub <noreply@github.com>2020-10-14 15:43:31 (GMT)
commitc5b049b91ca50c615f9a5425055c2b79a82ac547 (patch)
tree7fac1361bbd7bb7ca533f034d800e593b32266b4 /Lib
parentb4d895336a4692c95b4533adcc5c63a489e5e4e4 (diff)
downloadcpython-c5b049b91ca50c615f9a5425055c2b79a82ac547.zip
cpython-c5b049b91ca50c615f9a5425055c2b79a82ac547.tar.gz
cpython-c5b049b91ca50c615f9a5425055c2b79a82ac547.tar.bz2
bpo-39337: encodings.normalize_encoding() now ignores non-ASCII characters (GH-22219)
Diffstat (limited to 'Lib')
-rw-r--r--Lib/encodings/__init__.py3
-rw-r--r--Lib/test/test_codecs.py14
2 files changed, 15 insertions, 2 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index ddd5afd..4b37d33 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -61,7 +61,8 @@ def normalize_encoding(encoding):
if c.isalnum() or c == '.':
if punct and chars:
chars.append('_')
- chars.append(c)
+ if c.isascii():
+ chars.append(c)
punct = False
else:
punct = True
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index ddf4e08..09ceef7 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3417,7 +3417,7 @@ class Rot13UtilTest(unittest.TestCase):
class CodecNameNormalizationTest(unittest.TestCase):
"""Test codec name normalization"""
- def test_normalized_encoding(self):
+ def test_codecs_lookup(self):
FOUND = (1, 2, 3, 4)
NOT_FOUND = (None, None, None, None)
def search_function(encoding):
@@ -3439,6 +3439,18 @@ class CodecNameNormalizationTest(unittest.TestCase):
self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8'))
self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
+ def test_encodings_normalize_encoding(self):
+ # encodings.normalize_encoding() ignores non-ASCII characters.
+ normalize = encodings.normalize_encoding
+ self.assertEqual(normalize('utf_8'), 'utf_8')
+ self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
+ self.assertEqual(normalize('utf 8'), 'utf_8')
+ # encodings.normalize_encoding() doesn't convert
+ # characters to lower case.
+ self.assertEqual(normalize('UTF 8'), 'UTF_8')
+ self.assertEqual(normalize('utf.8'), 'utf.8')
+ self.assertEqual(normalize('utf...8'), 'utf...8')
+
if __name__ == "__main__":
unittest.main()