diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2022-03-21 16:28:22 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-21 16:28:22 (GMT) |
commit | 345b390ed69f36681dbc41187bc8f49cd9135b54 (patch) | |
tree | 31ce6451bed718405b29bdb32c7eb4ff96fe5697 /Lib/sre_compile.py | |
parent | 2bde6827ea4f136297b2d882480b981ff26262b6 (diff) | |
download | cpython-345b390ed69f36681dbc41187bc8f49cd9135b54.zip cpython-345b390ed69f36681dbc41187bc8f49cd9135b54.tar.gz cpython-345b390ed69f36681dbc41187bc8f49cd9135b54.tar.bz2 |
bpo-433030: Add support of atomic grouping in regular expressions (GH-31982)
* Atomic grouping: (?>...).
* Possessive quantifiers: x++, x*+, x?+, x{m,n}+.
Equivalent to (?>x+), (?>x*), (?>x?), (?>x{m,n}).
Co-authored-by: Jeffrey C. Jacobs <timehorse@users.sourceforge.net>
Diffstat (limited to 'Lib/sre_compile.py')
-rw-r--r-- | Lib/sre_compile.py | 38 |
1 files changed, 27 insertions, 11 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index c6398bf..0867200 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -17,11 +17,16 @@ from sre_constants import * assert _sre.MAGIC == MAGIC, "SRE module mismatch" _LITERAL_CODES = {LITERAL, NOT_LITERAL} -_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT} _SUCCESS_CODES = {SUCCESS, FAILURE} _ASSERT_CODES = {ASSERT, ASSERT_NOT} _UNIT_CODES = _LITERAL_CODES | {ANY, IN} +_REPEATING_CODES = { + MIN_REPEAT: (REPEAT, MIN_UNTIL, MIN_REPEAT_ONE), + MAX_REPEAT: (REPEAT, MAX_UNTIL, REPEAT_ONE), + POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE), +} + # Sets of lowercase characters which have the same uppercase. _equivalences = ( # LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I @@ -138,10 +143,7 @@ def _compile(code, pattern, flags): if flags & SRE_FLAG_TEMPLATE: raise error("internal: unsupported template operator %r" % (op,)) if _simple(av[2]): - if op is MAX_REPEAT: - emit(REPEAT_ONE) - else: - emit(MIN_REPEAT_ONE) + emit(REPEATING_CODES[op][2]) skip = _len(code); emit(0) emit(av[0]) emit(av[1]) @@ -149,16 +151,13 @@ def _compile(code, pattern, flags): emit(SUCCESS) code[skip] = _len(code) - skip else: - emit(REPEAT) + emit(REPEATING_CODES[op][0]) skip = _len(code); emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) code[skip] = _len(code) - skip - if op is MAX_REPEAT: - emit(MAX_UNTIL) - else: - emit(MIN_UNTIL) + emit(REPEATING_CODES[op][1]) elif op is SUBPATTERN: group, add_flags, del_flags, p = av if group: @@ -169,6 +168,17 @@ def _compile(code, pattern, flags): if group: emit(MARK) emit((group-1)*2+1) + elif op is ATOMIC_GROUP: + # Atomic Groups are handled by starting with an Atomic + # Group op code, then putting in the atomic group pattern + # and finally a success op code to tell any repeat + # operations within the Atomic Group to stop eating and + # pop their stack if they reach it + emit(ATOMIC_GROUP) + skip = _len(code); emit(0) + _compile(code, av, flags) + emit(SUCCESS) + code[skip] = _len(code) - skip elif op in SUCCESS_CODES: emit(op) elif op in ASSERT_CODES: @@ -709,7 +719,8 @@ def dis(code): else: print_(FAILURE) i += 1 - elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE): + elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE, + POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE): skip, min, max = code[i: i+3] if max == MAXREPEAT: max = 'MAXREPEAT' @@ -725,6 +736,11 @@ def dis(code): print_(op, skip, arg, to=i+skip) dis_(i+2, i+skip) i += skip + elif op is ATOMIC_GROUP: + skip = code[i] + print_(op, skip, to=i+skip) + dis_(i+1, i+skip) + i += skip elif op is INFO: skip, flags, min, max = code[i: i+4] if max == MAXREPEAT: |