diff options
author | Mark Dickinson <dickinsm@gmail.com> | 2009-07-28 20:35:03 (GMT) |
---|---|---|
committer | Mark Dickinson <dickinsm@gmail.com> | 2009-07-28 20:35:03 (GMT) |
commit | fe67bd91685f89fbf95ee9727ce03d20dea3e9b8 (patch) | |
tree | a05a8cfba86d1994b8e8af49ec5cef9924760008 /Lib | |
parent | 8d31f5413ccbd1857bac21887272f06a84cca619 (diff) | |
download | cpython-fe67bd91685f89fbf95ee9727ce03d20dea3e9b8.zip cpython-fe67bd91685f89fbf95ee9727ce03d20dea3e9b8.tar.gz cpython-fe67bd91685f89fbf95ee9727ce03d20dea3e9b8.tar.bz2 |
Issue #6561: '\d' regular expression should not match characters of
category [No]; only those of category [Nd]. (Backport of r74237
from py3k.)
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_re.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 4f543d9..c4cc820 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -636,6 +636,27 @@ class ReTests(unittest.TestCase): self.assertEqual(iter.next().span(), (4, 4)) self.assertRaises(StopIteration, iter.next) + def test_bug_6561(self): + # '\d' should match characters in Unicode category 'Nd' + # (Number, Decimal Digit), but not those in 'Nl' (Number, + # Letter) or 'No' (Number, Other). + decimal_digits = [ + u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd' + u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd' + u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' + ] + for x in decimal_digits: + self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x) + + not_decimal_digits = [ + u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl' + u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' + u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No' + u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' + ] + for x in not_decimal_digits: + self.assertIsNone(re.match('^\d$', x, re.UNICODE)) + def test_empty_array(self): # SF buf 1647541 import array |