summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarta Gómez Macías <mgmacias@google.com>2023-05-21 16:07:28 (GMT)
committerGitHub <noreply@github.com>2023-05-21 16:07:28 (GMT)
commitffe47cb623999db05959ec4b5168d1c87a1e40ef (patch)
treeb561d102b1bbe38b3bf32b5fcd5c179c0e12c00c
parent93923793f602ea9117f13bfac8cbe01a864eeb01 (diff)
downloadcpython-ffe47cb623999db05959ec4b5168d1c87a1e40ef.zip
cpython-ffe47cb623999db05959ec4b5168d1c87a1e40ef.tar.gz
cpython-ffe47cb623999db05959ec4b5168d1c87a1e40ef.tar.bz2
gh-104719: Restore Tokenize module constants (#104722)
-rw-r--r--Lib/tokenize.py101
1 files changed, 101 insertions, 0 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index bfe40c6..cef2773 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -56,6 +56,107 @@ class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line'
else:
return self.type
+def group(*choices): return '(' + '|'.join(choices) + ')'
+def any(*choices): return group(*choices) + '*'
+def maybe(*choices): return group(*choices) + '?'
+
+# Note: we use unicode matching for names ("\w") but ascii matching for
+# number literals.
+Whitespace = r'[ \f\t]*'
+Comment = r'#[^\r\n]*'
+Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
+Name = r'\w+'
+
+Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
+Binnumber = r'0[bB](?:_?[01])+'
+Octnumber = r'0[oO](?:_?[0-7])+'
+Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
+Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
+Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
+Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
+ r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
+Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
+Floatnumber = group(Pointfloat, Expfloat)
+Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
+Number = group(Imagnumber, Floatnumber, Intnumber)
+
+# Return the empty string, plus all of the valid string prefixes.
+def _all_string_prefixes():
+ # The valid string prefixes. Only contain the lower case versions,
+ # and don't contain any permutations (include 'fr', but not
+ # 'rf'). The various permutations will be generated.
+ _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
+ # if we add binary f-strings, add: ['fb', 'fbr']
+ result = {''}
+ for prefix in _valid_string_prefixes:
+ for t in _itertools.permutations(prefix):
+ # create a list with upper and lower versions of each
+ # character
+ for u in _itertools.product(*[(c, c.upper()) for c in t]):
+ result.add(''.join(u))
+ return result
+
+@functools.lru_cache
+def _compile(expr):
+ return re.compile(expr, re.UNICODE)
+
+# Note that since _all_string_prefixes includes the empty string,
+# StringPrefix can be the empty string (making it optional).
+StringPrefix = group(*_all_string_prefixes())
+
+# Tail end of ' string.
+Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
+# Tail end of " string.
+Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
+# Tail end of ''' string.
+Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
+# Tail end of """ string.
+Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
+Triple = group(StringPrefix + "'''", StringPrefix + '"""')
+# Single-line ' or " string.
+String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
+ StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
+
+# Sorting in reverse order puts the long operators before their prefixes.
+# Otherwise if = came before ==, == would get recognized as two instances
+# of =.
+Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True)))
+Funny = group(r'\r?\n', Special)
+
+PlainToken = group(Number, Funny, String, Name)
+Token = Ignore + PlainToken
+
+# First (or only) line of ' or " string.
+ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+ group("'", r'\\\r?\n'),
+ StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+ group('"', r'\\\r?\n'))
+PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
+PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
+
+# For a given string prefix plus quotes, endpats maps it to a regex
+# to match the remainder of that string. _prefix can be empty, for
+# a normal single or triple quoted string (with no prefix).
+endpats = {}
+for _prefix in _all_string_prefixes():
+ endpats[_prefix + "'"] = Single
+ endpats[_prefix + '"'] = Double
+ endpats[_prefix + "'''"] = Single3
+ endpats[_prefix + '"""'] = Double3
+del _prefix
+
+# A set of all of the single and triple quoted string prefixes,
+# including the opening quotes.
+single_quoted = set()
+triple_quoted = set()
+for t in _all_string_prefixes():
+ for u in (t + '"', t + "'"):
+ single_quoted.add(u)
+ for u in (t + '"""', t + "'''"):
+ triple_quoted.add(u)
+del t, u
+
+tabsize = 8
class TokenError(Exception): pass