summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorMark Dickinson <dickinsm@gmail.com>2009-07-28 17:22:36 (GMT)
committerMark Dickinson <dickinsm@gmail.com>2009-07-28 17:22:36 (GMT)
commit1f268285ff810681612c8f7c91c1faeb70535f52 (patch)
tree8531e2c66af998125d53694c82626e8a1a204364 /Lib
parent6bd13fbbc8b4917023d124d6523685d456a8e92a (diff)
downloadcpython-1f268285ff810681612c8f7c91c1faeb70535f52.zip
cpython-1f268285ff810681612c8f7c91c1faeb70535f52.tar.gz
cpython-1f268285ff810681612c8f7c91c1faeb70535f52.tar.bz2
Issue #6561: '\d' in a regular expression should match only Unicode
character category [Nd], not [No].
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_re.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 383b56a..8b4d268 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -605,6 +605,27 @@ class ReTests(unittest.TestCase):
self.assertEqual(next(iter).span(), (4, 4))
self.assertRaises(StopIteration, next, iter)
+ def test_bug_6561(self):
+ # '\d' should match characters in Unicode category 'Nd'
+ # (Number, Decimal Digit), but not those in 'Nl' (Number,
+ # Letter) or 'No' (Number, Other).
+ decimal_digits = [
+ '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
+ '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
+ '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
+ ]
+ for x in decimal_digits:
+ self.assertEqual(re.match('^\d$', x).group(0), x)
+
+ not_decimal_digits = [
+ '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
+ '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
+ '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
+ '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
+ ]
+ for x in not_decimal_digits:
+ self.assertIsNone(re.match('^\d$', x))
+
def test_empty_array(self):
# SF buf 1647541
import array