bpo-25324: copy tok_name before changing it (#1608)

* add test to check if were modifying token * copy list so import tokenize doesnt have side effects on token * shorten line * add tokenize tokens to token.h to get them to show up in token * move ERRORTOKEN back to its previous location, and fix nitpick * copy comments from token.h automatically * fix whitespace and make more pythonic * change to fix comments from @haypo * update token.rst and Misc/NEWS * change wording * some more wording changes
author: Albert-Jan Nijburg <albertjan@trinket.io> 2017-05-31 14:00:21 (GMT)
committer: Victor Stinner <victor.stinner@gmail.com> 2017-05-31 14:00:21 (GMT)
commit: fc354f07855a9197e71f851ad930cbf5652f9160 (patch)
tree: 5c7c6ad2a4de72993d916c321156572d4fa4635d /Lib
parent: 85aba238e49abd2d5a604102981d28a50f305443 (diff)
download: cpython-fc354f07855a9197e71f851ad930cbf5652f9160.zip
cpython-fc354f07855a9197e71f851ad930cbf5652f9160.tar.gz
cpython-fc354f07855a9197e71f851ad930cbf5652f9160.tar.bz2
3 files changed, 32 insertions, 20 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index dcaf58f..538612c 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1343,13 +1343,13 @@ class TestTokenize(TestCase):
         tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
         num_optypes = len(optypes)
         self.assertEqual(len(tokens), 2 + num_optypes)
-        self.assertEqual(token.tok_name[tokens[0].exact_type],
-                         token.tok_name[ENCODING])
+        self.assertEqual(tok_name[tokens[0].exact_type],
+                         tok_name[ENCODING])
         for i in range(num_optypes):
-            self.assertEqual(token.tok_name[tokens[i + 1].exact_type],
-                             token.tok_name[optypes[i]])
-        self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],
-                         token.tok_name[token.ENDMARKER])
+            self.assertEqual(tok_name[tokens[i + 1].exact_type],
+                             tok_name[optypes[i]])
+        self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
+                         tok_name[token.ENDMARKER])
 
     def test_exact_type(self):
         self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
diff --git a/Lib/token.py b/Lib/token.py
index 5fdb222..091f80b 100644
--- a/Lib/token.py
+++ b/Lib/token.py
@@ -63,11 +63,17 @@ AT = 49
 ATEQUAL = 50
 RARROW = 51
 ELLIPSIS = 52
+# Don't forget to update the table _PyParser_TokenNames in tokenizer.c!
 OP = 53
 AWAIT = 54
 ASYNC = 55
 ERRORTOKEN = 56
-N_TOKENS = 57
+# These aren't used by the C tokenizer but are needed for tokenize.py
+COMMENT = 57
+NL = 58
+ENCODING = 59
+N_TOKENS = 60
+# Special definitions for cooperation with parser
 NT_OFFSET = 256
 #--end constants--
 
@@ -102,15 +108,26 @@ def _main():
     with fp:
         lines = fp.read().split("\n")
     prog = re.compile(
-        "#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)",
+        r"#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)",
         re.IGNORECASE)
+    comment_regex = re.compile(
+        r"^\s*/\*\s*(.+?)\s*\*/\s*$",
+        re.IGNORECASE)
+
     tokens = {}
+    prev_val = None
     for line in lines:
         match = prog.match(line)
         if match:
             name, val = match.group(1, 2)
             val = int(val)
-            tokens[val] = name          # reverse so we can sort them...
+            tokens[val] = {'token': name}          # reverse so we can sort them...
+            prev_val = val
+        else:
+            comment_match = comment_regex.match(line)
+            if comment_match and prev_val is not None:
+                comment = comment_match.group(1)
+                tokens[prev_val]['comment'] = comment
     keys = sorted(tokens.keys())
     # load the output skeleton from the target:
     try:
@@ -127,8 +144,10 @@ def _main():
         sys.stderr.write("target does not contain format markers")
         sys.exit(3)
     lines = []
-    for val in keys:
-        lines.append("%s = %d" % (tokens[val], val))
+    for key in keys:
+        lines.append("%s = %d" % (tokens[key]["token"], key))
+        if "comment" in tokens[key]:
+            lines.append("# %s" % tokens[key]["comment"])
     format[start:end] = lines
     try:
         fp = open(outFileName, 'w')
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 9017bb1..5fa4152 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -38,17 +38,10 @@ cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
 blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
 
 import token
-__all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",
-                           "NL", "untokenize", "ENCODING", "TokenInfo"]
+__all__ = token.__all__ + ["tokenize", "detect_encoding",
+                           "untokenize", "TokenInfo"]
 del token
 
-COMMENT = N_TOKENS
-tok_name[COMMENT] = 'COMMENT'
-NL = N_TOKENS + 1
-tok_name[NL] = 'NL'
-ENCODING = N_TOKENS + 2
-tok_name[ENCODING] = 'ENCODING'
-N_TOKENS += 3
 EXACT_TOKEN_TYPES = {
     '(':   LPAR,
     ')':   RPAR,
author	Albert-Jan Nijburg <albertjan@trinket.io>	2017-05-31 14:00:21 (GMT)
committer	Victor Stinner <victor.stinner@gmail.com>	2017-05-31 14:00:21 (GMT)
commit	fc354f07855a9197e71f851ad930cbf5652f9160 (patch)
tree	5c7c6ad2a4de72993d916c321156572d4fa4635d /Lib
parent	85aba238e49abd2d5a604102981d28a50f305443 (diff)
download	cpython-fc354f07855a9197e71f851ad930cbf5652f9160.zip cpython-fc354f07855a9197e71f851ad930cbf5652f9160.tar.gz cpython-fc354f07855a9197e71f851ad930cbf5652f9160.tar.bz2