summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2022-04-23 15:49:23 (GMT)
committerGitHub <noreply@github.com>2022-04-23 15:49:23 (GMT)
commit28890427c58d30f1041b36859733159475c67496 (patch)
tree988acc020e6a46d0717bf9021f46c7b57b2b9e07 /Lib
parent92c1037afc28d9d22e43b275c5e8fae41729ec1c (diff)
downloadcpython-28890427c58d30f1041b36859733159475c67496.zip
cpython-28890427c58d30f1041b36859733159475c67496.tar.gz
cpython-28890427c58d30f1041b36859733159475c67496.tar.bz2
RE: Pre-split the list of opcode names (GH-91859)
1. It makes them interned. 2. It allows to add comments to individual opcodes.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/re/_constants.py141
1 files changed, 71 insertions, 70 deletions
diff --git a/Lib/re/_constants.py b/Lib/re/_constants.py
index 5e999de..aa1a590 100644
--- a/Lib/re/_constants.py
+++ b/Lib/re/_constants.py
@@ -64,88 +64,89 @@ class _NamedIntConstant(int):
MAXREPEAT = _NamedIntConstant(MAXREPEAT, 'MAXREPEAT')
-def _makecodes(names):
- names = names.strip().split()
+def _makecodes(*names):
items = [_NamedIntConstant(i, name) for i, name in enumerate(names)]
globals().update({item.name: item for item in items})
return items
# operators
-# failure=0 success=1 (just because it looks better that way :-)
-OPCODES = _makecodes("""
- FAILURE SUCCESS
-
- ANY ANY_ALL
- ASSERT ASSERT_NOT
- AT
- BRANCH
- CALL
- CATEGORY
- CHARSET BIGCHARSET
- GROUPREF GROUPREF_EXISTS
- IN
- INFO
- JUMP
- LITERAL
- MARK
- MAX_UNTIL
- MIN_UNTIL
- NOT_LITERAL
- NEGATE
- RANGE
- REPEAT
- REPEAT_ONE
- SUBPATTERN
- MIN_REPEAT_ONE
- ATOMIC_GROUP
- POSSESSIVE_REPEAT
- POSSESSIVE_REPEAT_ONE
-
- GROUPREF_IGNORE
- IN_IGNORE
- LITERAL_IGNORE
- NOT_LITERAL_IGNORE
-
- GROUPREF_LOC_IGNORE
- IN_LOC_IGNORE
- LITERAL_LOC_IGNORE
- NOT_LITERAL_LOC_IGNORE
-
- GROUPREF_UNI_IGNORE
- IN_UNI_IGNORE
- LITERAL_UNI_IGNORE
- NOT_LITERAL_UNI_IGNORE
- RANGE_UNI_IGNORE
-
- MIN_REPEAT MAX_REPEAT
-""")
+OPCODES = _makecodes(
+ # failure=0 success=1 (just because it looks better that way :-)
+ 'FAILURE', 'SUCCESS',
+
+ 'ANY', 'ANY_ALL',
+ 'ASSERT', 'ASSERT_NOT',
+ 'AT',
+ 'BRANCH',
+ 'CALL',
+ 'CATEGORY',
+ 'CHARSET', 'BIGCHARSET',
+ 'GROUPREF', 'GROUPREF_EXISTS',
+ 'IN',
+ 'INFO',
+ 'JUMP',
+ 'LITERAL',
+ 'MARK',
+ 'MAX_UNTIL',
+ 'MIN_UNTIL',
+ 'NOT_LITERAL',
+ 'NEGATE',
+ 'RANGE',
+ 'REPEAT',
+ 'REPEAT_ONE',
+ 'SUBPATTERN',
+ 'MIN_REPEAT_ONE',
+ 'ATOMIC_GROUP',
+ 'POSSESSIVE_REPEAT',
+ 'POSSESSIVE_REPEAT_ONE',
+
+ 'GROUPREF_IGNORE',
+ 'IN_IGNORE',
+ 'LITERAL_IGNORE',
+ 'NOT_LITERAL_IGNORE',
+
+ 'GROUPREF_LOC_IGNORE',
+ 'IN_LOC_IGNORE',
+ 'LITERAL_LOC_IGNORE',
+ 'NOT_LITERAL_LOC_IGNORE',
+
+ 'GROUPREF_UNI_IGNORE',
+ 'IN_UNI_IGNORE',
+ 'LITERAL_UNI_IGNORE',
+ 'NOT_LITERAL_UNI_IGNORE',
+ 'RANGE_UNI_IGNORE',
+
+ # The following opcodes are only occurred in the parser output,
+ # but not in the compiled code.
+ 'MIN_REPEAT', 'MAX_REPEAT',
+)
del OPCODES[-2:] # remove MIN_REPEAT and MAX_REPEAT
# positions
-ATCODES = _makecodes("""
- AT_BEGINNING AT_BEGINNING_LINE AT_BEGINNING_STRING
- AT_BOUNDARY AT_NON_BOUNDARY
- AT_END AT_END_LINE AT_END_STRING
+ATCODES = _makecodes(
+ 'AT_BEGINNING', 'AT_BEGINNING_LINE', 'AT_BEGINNING_STRING',
+ 'AT_BOUNDARY', 'AT_NON_BOUNDARY',
+ 'AT_END', 'AT_END_LINE', 'AT_END_STRING',
- AT_LOC_BOUNDARY AT_LOC_NON_BOUNDARY
+ 'AT_LOC_BOUNDARY', 'AT_LOC_NON_BOUNDARY',
- AT_UNI_BOUNDARY AT_UNI_NON_BOUNDARY
-""")
+ 'AT_UNI_BOUNDARY', 'AT_UNI_NON_BOUNDARY',
+)
# categories
-CHCODES = _makecodes("""
- CATEGORY_DIGIT CATEGORY_NOT_DIGIT
- CATEGORY_SPACE CATEGORY_NOT_SPACE
- CATEGORY_WORD CATEGORY_NOT_WORD
- CATEGORY_LINEBREAK CATEGORY_NOT_LINEBREAK
-
- CATEGORY_LOC_WORD CATEGORY_LOC_NOT_WORD
-
- CATEGORY_UNI_DIGIT CATEGORY_UNI_NOT_DIGIT
- CATEGORY_UNI_SPACE CATEGORY_UNI_NOT_SPACE
- CATEGORY_UNI_WORD CATEGORY_UNI_NOT_WORD
- CATEGORY_UNI_LINEBREAK CATEGORY_UNI_NOT_LINEBREAK
-""")
+CHCODES = _makecodes(
+ 'CATEGORY_DIGIT', 'CATEGORY_NOT_DIGIT',
+ 'CATEGORY_SPACE', 'CATEGORY_NOT_SPACE',
+ 'CATEGORY_WORD', 'CATEGORY_NOT_WORD',
+ 'CATEGORY_LINEBREAK', 'CATEGORY_NOT_LINEBREAK',
+
+ 'CATEGORY_LOC_WORD', 'CATEGORY_LOC_NOT_WORD',
+
+ 'CATEGORY_UNI_DIGIT', 'CATEGORY_UNI_NOT_DIGIT',
+ 'CATEGORY_UNI_SPACE', 'CATEGORY_UNI_NOT_SPACE',
+ 'CATEGORY_UNI_WORD', 'CATEGORY_UNI_NOT_WORD',
+ 'CATEGORY_UNI_LINEBREAK', 'CATEGORY_UNI_NOT_LINEBREAK',
+)
# replacement operations for "ignore case" mode