diff options
author | Marta Gómez Macías <mgmacias@google.com> | 2023-05-21 16:07:28 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-21 16:07:28 (GMT) |
commit | ffe47cb623999db05959ec4b5168d1c87a1e40ef (patch) | |
tree | b561d102b1bbe38b3bf32b5fcd5c179c0e12c00c | |
parent | 93923793f602ea9117f13bfac8cbe01a864eeb01 (diff) | |
download | cpython-ffe47cb623999db05959ec4b5168d1c87a1e40ef.zip cpython-ffe47cb623999db05959ec4b5168d1c87a1e40ef.tar.gz cpython-ffe47cb623999db05959ec4b5168d1c87a1e40ef.tar.bz2 |
gh-104719: Restore Tokenize module constants (#104722)
-rw-r--r-- | Lib/tokenize.py | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py index bfe40c6..cef2773 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -56,6 +56,107 @@ class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line' else: return self.type +def group(*choices): return '(' + '|'.join(choices) + ')' +def any(*choices): return group(*choices) + '*' +def maybe(*choices): return group(*choices) + '?' + +# Note: we use unicode matching for names ("\w") but ascii matching for +# number literals. +Whitespace = r'[ \f\t]*' +Comment = r'#[^\r\n]*' +Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) +Name = r'\w+' + +Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' +Binnumber = r'0[bB](?:_?[01])+' +Octnumber = r'0[oO](?:_?[0-7])+' +Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)' +Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) +Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*' +Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', + r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) +Expfloat = r'[0-9](?:_?[0-9])*' + Exponent +Floatnumber = group(Pointfloat, Expfloat) +Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') +Number = group(Imagnumber, Floatnumber, Intnumber) + +# Return the empty string, plus all of the valid string prefixes. +def _all_string_prefixes(): + # The valid string prefixes. Only contain the lower case versions, + # and don't contain any permutations (include 'fr', but not + # 'rf'). The various permutations will be generated. + _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr'] + # if we add binary f-strings, add: ['fb', 'fbr'] + result = {''} + for prefix in _valid_string_prefixes: + for t in _itertools.permutations(prefix): + # create a list with upper and lower versions of each + # character + for u in _itertools.product(*[(c, c.upper()) for c in t]): + result.add(''.join(u)) + return result + +@functools.lru_cache +def _compile(expr): + return re.compile(expr, re.UNICODE) + +# Note that since _all_string_prefixes includes the empty string, +# StringPrefix can be the empty string (making it optional). +StringPrefix = group(*_all_string_prefixes()) + +# Tail end of ' string. +Single = r"[^'\\]*(?:\\.[^'\\]*)*'" +# Tail end of " string. +Double = r'[^"\\]*(?:\\.[^"\\]*)*"' +# Tail end of ''' string. +Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" +# Tail end of """ string. +Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' +Triple = group(StringPrefix + "'''", StringPrefix + '"""') +# Single-line ' or " string. +String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", + StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"') + +# Sorting in reverse order puts the long operators before their prefixes. +# Otherwise if = came before ==, == would get recognized as two instances +# of =. +Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True))) +Funny = group(r'\r?\n', Special) + +PlainToken = group(Number, Funny, String, Name) +Token = Ignore + PlainToken + +# First (or only) line of ' or " string. +ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + + group("'", r'\\\r?\n'), + StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + + group('"', r'\\\r?\n')) +PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple) +PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) + +# For a given string prefix plus quotes, endpats maps it to a regex +# to match the remainder of that string. _prefix can be empty, for +# a normal single or triple quoted string (with no prefix). +endpats = {} +for _prefix in _all_string_prefixes(): + endpats[_prefix + "'"] = Single + endpats[_prefix + '"'] = Double + endpats[_prefix + "'''"] = Single3 + endpats[_prefix + '"""'] = Double3 +del _prefix + +# A set of all of the single and triple quoted string prefixes, +# including the opening quotes. +single_quoted = set() +triple_quoted = set() +for t in _all_string_prefixes(): + for u in (t + '"', t + "'"): + single_quoted.add(u) + for u in (t + '"""', t + "'''"): + triple_quoted.add(u) +del t, u + +tabsize = 8 class TokenError(Exception): pass |