diff options
author | Hai Shi <shihai1992@gmail.com> | 2020-10-14 15:43:31 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-14 15:43:31 (GMT) |
commit | c5b049b91ca50c615f9a5425055c2b79a82ac547 (patch) | |
tree | 7fac1361bbd7bb7ca533f034d800e593b32266b4 | |
parent | b4d895336a4692c95b4533adcc5c63a489e5e4e4 (diff) | |
download | cpython-c5b049b91ca50c615f9a5425055c2b79a82ac547.zip cpython-c5b049b91ca50c615f9a5425055c2b79a82ac547.tar.gz cpython-c5b049b91ca50c615f9a5425055c2b79a82ac547.tar.bz2 |
bpo-39337: encodings.normalize_encoding() now ignores non-ASCII characters (GH-22219)
-rw-r--r-- | Doc/whatsnew/3.10.rst | 5 | ||||
-rw-r--r-- | Lib/encodings/__init__.py | 3 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 14 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst | 1 |
4 files changed, 21 insertions, 2 deletions
diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index c8ddcd2..738ef97 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -186,6 +186,11 @@ by :func:`curses.color_content`, :func:`curses.init_color`, support is provided by the underlying ncurses library. (Contributed by Jeffrey Kintscher and Hans Petter Jansson in :issue:`36982`.) +encodings +--------- +:func:`encodings.normalize_encoding` now ignores non-ASCII characters. +(Contributed by Hai Shi in :issue:`39337`.) + glob ---- diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index ddd5afd..4b37d33 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -61,7 +61,8 @@ def normalize_encoding(encoding): if c.isalnum() or c == '.': if punct and chars: chars.append('_') - chars.append(c) + if c.isascii(): + chars.append(c) punct = False else: punct = True diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index ddf4e08..09ceef7 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3417,7 +3417,7 @@ class Rot13UtilTest(unittest.TestCase): class CodecNameNormalizationTest(unittest.TestCase): """Test codec name normalization""" - def test_normalized_encoding(self): + def test_codecs_lookup(self): FOUND = (1, 2, 3, 4) NOT_FOUND = (None, None, None, None) def search_function(encoding): @@ -3439,6 +3439,18 @@ class CodecNameNormalizationTest(unittest.TestCase): self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8')) self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8')) + def test_encodings_normalize_encoding(self): + # encodings.normalize_encoding() ignores non-ASCII characters. + normalize = encodings.normalize_encoding + self.assertEqual(normalize('utf_8'), 'utf_8') + self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8') + self.assertEqual(normalize('utf 8'), 'utf_8') + # encodings.normalize_encoding() doesn't convert + # characters to lower case. + self.assertEqual(normalize('UTF 8'), 'UTF_8') + self.assertEqual(normalize('utf.8'), 'utf.8') + self.assertEqual(normalize('utf...8'), 'utf...8') + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst new file mode 100644 index 0000000..c2b4dbe --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst @@ -0,0 +1 @@ +:func:`encodings.normalize_encoding` now ignores non-ASCII characters. |