diff options
author | Hai Shi <shihai1992@gmail.com> | 2020-10-14 15:43:31 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-14 15:43:31 (GMT) |
commit | c5b049b91ca50c615f9a5425055c2b79a82ac547 (patch) | |
tree | 7fac1361bbd7bb7ca533f034d800e593b32266b4 /Lib | |
parent | b4d895336a4692c95b4533adcc5c63a489e5e4e4 (diff) | |
download | cpython-c5b049b91ca50c615f9a5425055c2b79a82ac547.zip cpython-c5b049b91ca50c615f9a5425055c2b79a82ac547.tar.gz cpython-c5b049b91ca50c615f9a5425055c2b79a82ac547.tar.bz2 |
bpo-39337: encodings.normalize_encoding() now ignores non-ASCII characters (GH-22219)
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/encodings/__init__.py | 3 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 14 |
2 files changed, 15 insertions, 2 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index ddd5afd..4b37d33 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -61,7 +61,8 @@ def normalize_encoding(encoding): if c.isalnum() or c == '.': if punct and chars: chars.append('_') - chars.append(c) + if c.isascii(): + chars.append(c) punct = False else: punct = True diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index ddf4e08..09ceef7 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3417,7 +3417,7 @@ class Rot13UtilTest(unittest.TestCase): class CodecNameNormalizationTest(unittest.TestCase): """Test codec name normalization""" - def test_normalized_encoding(self): + def test_codecs_lookup(self): FOUND = (1, 2, 3, 4) NOT_FOUND = (None, None, None, None) def search_function(encoding): @@ -3439,6 +3439,18 @@ class CodecNameNormalizationTest(unittest.TestCase): self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8')) self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8')) + def test_encodings_normalize_encoding(self): + # encodings.normalize_encoding() ignores non-ASCII characters. + normalize = encodings.normalize_encoding + self.assertEqual(normalize('utf_8'), 'utf_8') + self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8') + self.assertEqual(normalize('utf 8'), 'utf_8') + # encodings.normalize_encoding() doesn't convert + # characters to lower case. + self.assertEqual(normalize('UTF 8'), 'UTF_8') + self.assertEqual(normalize('utf.8'), 'utf.8') + self.assertEqual(normalize('utf...8'), 'utf...8') + if __name__ == "__main__": unittest.main() |