diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2008-08-19 17:56:33 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2008-08-19 17:56:33 (GMT) |
commit | fd036451bf0e0ade8783e21df801abf7be96d020 (patch) | |
tree | e70ff65a9e641d8e790bc091f0dc2507baf344ca /Lib/tokenize.py | |
parent | 3ad7ba10a20827b24d4b1aa9dd49474db8affbdd (diff) | |
download | cpython-fd036451bf0e0ade8783e21df801abf7be96d020.zip cpython-fd036451bf0e0ade8783e21df801abf7be96d020.tar.gz cpython-fd036451bf0e0ade8783e21df801abf7be96d020.tar.bz2 |
#2834: Change re module semantics, so that str and bytes mixing is forbidden,
and str (unicode) patterns get full unicode matching by default. The re.ASCII
flag is also introduced to ask for ASCII matching instead.
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r-- | Lib/tokenize.py | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 31366de..ec5a79a 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -47,21 +47,23 @@ def group(*choices): return '(' + '|'.join(choices) + ')' def any(*choices): return group(*choices) + '*' def maybe(*choices): return group(*choices) + '?' +# Note: we use unicode matching for names ("\w") but ascii matching for +# number literals. Whitespace = r'[ \f\t]*' Comment = r'#[^\r\n]*' Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) Name = r'[a-zA-Z_]\w*' -Hexnumber = r'0[xX][\da-fA-F]+' +Hexnumber = r'0[xX][0-9a-fA-F]+' Binnumber = r'0[bB][01]+' Octnumber = r'0[oO][0-7]+' -Decnumber = r'(?:0+|[1-9]\d*)' +Decnumber = r'(?:0+|[1-9][0-9]*)' Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) -Exponent = r'[eE][-+]?\d+' -Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent) -Expfloat = r'\d+' + Exponent +Exponent = r'[eE][-+]?[0-9]+' +Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent) +Expfloat = r'[0-9]+' + Exponent Floatnumber = group(Pointfloat, Expfloat) -Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]') +Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') Number = group(Imagnumber, Floatnumber, Intnumber) # Tail end of ' string. |