diff options
Diffstat (limited to 'Lib/sre_parse.py')
-rw-r--r-- | Lib/sre_parse.py | 114 |
1 files changed, 84 insertions, 30 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 521e379..09f3be2 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -65,6 +65,12 @@ FLAGS = { "u": SRE_FLAG_UNICODE, } +GLOBAL_FLAGS = (SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE | + SRE_FLAG_DEBUG | SRE_FLAG_TEMPLATE) + +class Verbose(Exception): + pass + class Pattern: # master pattern object. keeps track of global attributes def __init__(self): @@ -184,7 +190,7 @@ class SubPattern: lo = lo + i hi = hi + j elif op is SUBPATTERN: - i, j = av[1].getwidth() + i, j = av[-1].getwidth() lo = lo + i hi = hi + j elif op in _REPEATCODES: @@ -395,7 +401,7 @@ def _escape(source, escape, state): pass raise source.error("bad escape %s" % escape, len(escape)) -def _parse_sub(source, state, nested=True): +def _parse_sub(source, state, verbose, nested=True): # parse an alternation: a|b|c items = [] @@ -403,7 +409,7 @@ def _parse_sub(source, state, nested=True): sourcematch = source.match start = source.tell() while True: - itemsappend(_parse(source, state)) + itemsappend(_parse(source, state, verbose)) if not sourcematch("|"): break @@ -445,10 +451,10 @@ def _parse_sub(source, state, nested=True): subpattern.append((BRANCH, (None, items))) return subpattern -def _parse_sub_cond(source, state, condgroup): - item_yes = _parse(source, state) +def _parse_sub_cond(source, state, condgroup, verbose): + item_yes = _parse(source, state, verbose) if source.match("|"): - item_no = _parse(source, state) + item_no = _parse(source, state, verbose) if source.next == "|": raise source.error("conditional backref with more than two branches") else: @@ -457,7 +463,7 @@ def _parse_sub_cond(source, state, condgroup): subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) return subpattern -def _parse(source, state): +def _parse(source, state, verbose): # parse a simple pattern subpattern = SubPattern(state) @@ -467,7 +473,6 @@ def _parse(source, state): sourcematch = source.match _len = len _ord = ord - verbose = state.flags & SRE_FLAG_VERBOSE while True: @@ -621,6 +626,8 @@ def _parse(source, state): group = True name = None condgroup = None + add_flags = 0 + del_flags = 0 if sourcematch("?"): # options char = sourceget() @@ -682,7 +689,7 @@ def _parse(source, state): lookbehindgroups = state.lookbehindgroups if lookbehindgroups is None: state.lookbehindgroups = state.groups - p = _parse_sub(source, state) + p = _parse_sub(source, state, verbose) if dir < 0: if lookbehindgroups is None: state.lookbehindgroups = None @@ -718,19 +725,13 @@ def _parse(source, state): raise source.error("invalid group reference", len(condname) + 1) state.checklookbehindgroup(condgroup, source) - elif char in FLAGS: + elif char in FLAGS or char == "-": # flags - while True: - state.flags |= FLAGS[char] - char = sourceget() - if char is None: - raise source.error("missing )") - if char == ")": - break - if char not in FLAGS: - raise source.error("unknown flag", len(char)) - verbose = state.flags & SRE_FLAG_VERBOSE - continue + flags = _parse_flags(source, state, char) + if flags is None: # global flags + continue + add_flags, del_flags = flags + group = None else: raise source.error("unknown extension ?" + char, len(char) + 1) @@ -742,15 +743,17 @@ def _parse(source, state): except error as err: raise source.error(err.msg, len(name) + 1) from None if condgroup: - p = _parse_sub_cond(source, state, condgroup) + p = _parse_sub_cond(source, state, condgroup, verbose) else: - p = _parse_sub(source, state) + sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and + not (del_flags & SRE_FLAG_VERBOSE)) + p = _parse_sub(source, state, sub_verbose) if not source.match(")"): raise source.error("missing ), unterminated subpattern", source.tell() - start) if group is not None: state.closegroup(group, p) - subpatternappend((SUBPATTERN, (group, p))) + subpatternappend((SUBPATTERN, (group, add_flags, del_flags, p))) elif this == "^": subpatternappend((AT, AT_BEGINNING)) @@ -763,6 +766,53 @@ def _parse(source, state): return subpattern +def _parse_flags(source, state, char): + sourceget = source.get + add_flags = 0 + del_flags = 0 + if char != "-": + while True: + add_flags |= FLAGS[char] + char = sourceget() + if char is None: + raise source.error("missing -, : or )") + if char in ")-:": + break + if char not in FLAGS: + msg = "unknown flag" if char.isalpha() else "missing -, : or )" + raise source.error(msg, len(char)) + if char == ")": + if ((add_flags & SRE_FLAG_VERBOSE) and + not (state.flags & SRE_FLAG_VERBOSE)): + raise Verbose + state.flags |= add_flags + return None + if add_flags & GLOBAL_FLAGS: + raise source.error("bad inline flags: cannot turn on global flag", 1) + if char == "-": + char = sourceget() + if char is None: + raise source.error("missing flag") + if char not in FLAGS: + msg = "unknown flag" if char.isalpha() else "missing flag" + raise source.error(msg, len(char)) + while True: + del_flags |= FLAGS[char] + char = sourceget() + if char is None: + raise source.error("missing :") + if char == ":": + break + if char not in FLAGS: + msg = "unknown flag" if char.isalpha() else "missing :" + raise source.error(msg, len(char)) + assert char == ":" + if del_flags & GLOBAL_FLAGS: + raise source.error("bad inline flags: cannot turn off global flag", 1) + if add_flags & del_flags: + raise source.error("bad inline flags: flag turned on and off", 1) + return add_flags, del_flags + def fix_flags(src, flags): # Check and fix flags according to the type of pattern (str or bytes) if isinstance(src, str): @@ -789,18 +839,22 @@ def parse(str, flags=0, pattern=None): pattern.flags = flags pattern.str = str - p = _parse_sub(source, pattern, 0) + try: + p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, False) + except Verbose: + # the VERBOSE flag was switched on inside the pattern. to be + # on the safe side, we'll parse the whole thing again... + pattern = Pattern() + pattern.flags = flags | SRE_FLAG_VERBOSE + pattern.str = str + p = _parse_sub(source, pattern, True, False) + p.pattern.flags = fix_flags(str, p.pattern.flags) if source.next is not None: assert source.next == ")" raise source.error("unbalanced parenthesis") - if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE: - # the VERBOSE flag was switched on inside the pattern. to be - # on the safe side, we'll parse the whole thing again... - return parse(str, p.pattern.flags) - if flags & SRE_FLAG_DEBUG: p.dump() |