diff options
author | Greg Price <gnprice@gmail.com> | 2019-08-14 11:05:19 (GMT) |
---|---|---|
committer | Victor Stinner <vstinner@redhat.com> | 2019-08-14 11:05:19 (GMT) |
commit | 6bccbe7dfb998af862a183f2c36f0d4603af2c29 (patch) | |
tree | 888ea0d9773dd03c3e2a12f918548df151724186 /Lib/test/test_unicode.py | |
parent | 077af8c2c93dd71086e2c5e5ff1e634b6da8f214 (diff) | |
download | cpython-6bccbe7dfb998af862a183f2c36f0d4603af2c29.zip cpython-6bccbe7dfb998af862a183f2c36f0d4603af2c29.tar.gz cpython-6bccbe7dfb998af862a183f2c36f0d4603af2c29.tar.bz2 |
bpo-36502: Correct documentation of str.isspace() (GH-15019)
The documented definition was much broader than the real one:
there are tons of characters with general category "Other",
and we don't (and shouldn't) treat most of them as whitespace.
Rewrite the definition to agree with the comment on
_PyUnicode_IsWhitespace, and with the logic in makeunicodedata.py,
which is what generates that function and so ultimately governs.
Add suitable breadcrumbs so that a reader who wants to pin down
exactly what this definition means (what's a "bidirectional class"
of "B"?) can do so. The `unicodedata` module documentation is an
appropriate central place for our references to Unicode's own copious
documentation, so point there.
Also add to the isspace() test a thorough check that the
implementation agrees with the intended definition.
Diffstat (limited to 'Lib/test/test_unicode.py')
-rw-r--r-- | Lib/test/test_unicode.py | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 8be16c8..7bd7f51 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -12,6 +12,7 @@ import operator import struct import sys import textwrap +import unicodedata import unittest import warnings from test import support, string_tests @@ -617,11 +618,21 @@ class UnicodeTest(string_tests.CommonTest, self.checkequalnofix(True, '\u2000', 'isspace') self.checkequalnofix(True, '\u200a', 'isspace') self.checkequalnofix(False, '\u2014', 'isspace') - # apparently there are no non-BMP spaces chars in Unicode 6 + # There are no non-BMP whitespace chars as of Unicode 12. for ch in ['\U00010401', '\U00010427', '\U00010429', '\U0001044E', '\U0001F40D', '\U0001F46F']: self.assertFalse(ch.isspace(), '{!a} is not space.'.format(ch)) + @support.requires_resource('cpu') + def test_isspace_invariant(self): + for codepoint in range(sys.maxunicode + 1): + char = chr(codepoint) + bidirectional = unicodedata.bidirectional(char) + category = unicodedata.category(char) + self.assertEqual(char.isspace(), + (bidirectional in ('WS', 'B', 'S') + or category == 'Zs')) + def test_isalnum(self): super().test_isalnum() for ch in ['\U00010401', '\U00010427', '\U00010429', '\U0001044E', |