diff options
author | Ma Lin <animalize@users.noreply.github.com> | 2022-04-03 16:16:20 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-03 16:16:20 (GMT) |
commit | 6e3eee5c11b539e9aab39cff783acf57838c355a (patch) | |
tree | 29ee6720d249adfe5864ab9f09d7cb54d9f35238 /Lib/re/_compiler.py | |
parent | b82cdd1dac9a9be52051abd90a1ce69236ac41f4 (diff) | |
download | cpython-6e3eee5c11b539e9aab39cff783acf57838c355a.zip cpython-6e3eee5c11b539e9aab39cff783acf57838c355a.tar.gz cpython-6e3eee5c11b539e9aab39cff783acf57838c355a.tar.bz2 |
bpo-23689: re module, fix memory leak when a match is terminated by a signal or memory allocation failure (GH-32283)
Diffstat (limited to 'Lib/re/_compiler.py')
-rw-r--r-- | Lib/re/_compiler.py | 61 |
1 files changed, 39 insertions, 22 deletions
diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index 62da8e5..bedd4b8 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -67,14 +67,21 @@ _equivalences = ( _ignorecase_fixes = {i: tuple(j for j in t if i != j) for t in _equivalences for i in t} +class _CompileData: + __slots__ = ('code', 'repeat_count') + def __init__(self): + self.code = [] + self.repeat_count = 0 + def _combine_flags(flags, add_flags, del_flags, TYPE_FLAGS=_parser.TYPE_FLAGS): if add_flags & TYPE_FLAGS: flags &= ~TYPE_FLAGS return (flags | add_flags) & ~del_flags -def _compile(code, pattern, flags): +def _compile(data, pattern, flags): # internal: compile a (sub)pattern + code = data.code emit = code.append _len = len LITERAL_CODES = _LITERAL_CODES @@ -147,7 +154,7 @@ def _compile(code, pattern, flags): skip = _len(code); emit(0) emit(av[0]) emit(av[1]) - _compile(code, av[2], flags) + _compile(data, av[2], flags) emit(SUCCESS) code[skip] = _len(code) - skip else: @@ -155,7 +162,11 @@ def _compile(code, pattern, flags): skip = _len(code); emit(0) emit(av[0]) emit(av[1]) - _compile(code, av[2], flags) + # now op is in (MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT) + if op != POSSESSIVE_REPEAT: + emit(data.repeat_count) + data.repeat_count += 1 + _compile(data, av[2], flags) code[skip] = _len(code) - skip emit(REPEATING_CODES[op][1]) elif op is SUBPATTERN: @@ -164,7 +175,7 @@ def _compile(code, pattern, flags): emit(MARK) emit((group-1)*2) # _compile_info(code, p, _combine_flags(flags, add_flags, del_flags)) - _compile(code, p, _combine_flags(flags, add_flags, del_flags)) + _compile(data, p, _combine_flags(flags, add_flags, del_flags)) if group: emit(MARK) emit((group-1)*2+1) @@ -176,7 +187,7 @@ def _compile(code, pattern, flags): # pop their stack if they reach it emit(ATOMIC_GROUP) skip = _len(code); emit(0) - _compile(code, av, flags) + _compile(data, av, flags) emit(SUCCESS) code[skip] = _len(code) - skip elif op in SUCCESS_CODES: @@ -191,13 +202,13 @@ def _compile(code, pattern, flags): if lo != hi: raise error("look-behind requires fixed-width pattern") emit(lo) # look behind - _compile(code, av[1], flags) + _compile(data, av[1], flags) emit(SUCCESS) code[skip] = _len(code) - skip elif op is CALL: emit(op) skip = _len(code); emit(0) - _compile(code, av, flags) + _compile(data, av, flags) emit(SUCCESS) code[skip] = _len(code) - skip elif op is AT: @@ -216,7 +227,7 @@ def _compile(code, pattern, flags): for av in av[1]: skip = _len(code); emit(0) # _compile_info(code, av, flags) - _compile(code, av, flags) + _compile(data, av, flags) emit(JUMP) tailappend(_len(code)); emit(0) code[skip] = _len(code) - skip @@ -244,12 +255,12 @@ def _compile(code, pattern, flags): emit(op) emit(av[0]-1) skipyes = _len(code); emit(0) - _compile(code, av[1], flags) + _compile(data, av[1], flags) if av[2]: emit(JUMP) skipno = _len(code); emit(0) code[skipyes] = _len(code) - skipyes + 1 - _compile(code, av[2], flags) + _compile(data, av[2], flags) code[skipno] = _len(code) - skipno else: code[skipyes] = _len(code) - skipyes + 1 @@ -608,17 +619,17 @@ def isstring(obj): def _code(p, flags): flags = p.state.flags | flags - code = [] + data = _CompileData() # compile info block - _compile_info(code, p, flags) + _compile_info(data.code, p, flags) # compile the pattern - _compile(code, p.data, flags) + _compile(data, p.data, flags) - code.append(SUCCESS) + data.code.append(SUCCESS) - return code + return data def _hex_code(code): return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code) @@ -719,7 +730,7 @@ def dis(code): else: print_(FAILURE) i += 1 - elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE, + elif op in (REPEAT_ONE, MIN_REPEAT_ONE, POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE): skip, min, max = code[i: i+3] if max == MAXREPEAT: @@ -727,6 +738,13 @@ def dis(code): print_(op, skip, min, max, to=i+skip) dis_(i+3, i+skip) i += skip + elif op is REPEAT: + skip, min, max, repeat_index = code[i: i+4] + if max == MAXREPEAT: + max = 'MAXREPEAT' + print_(op, skip, min, max, repeat_index, to=i+skip) + dis_(i+4, i+skip) + i += skip elif op is GROUPREF_EXISTS: arg, skip = code[i: i+2] print_(op, arg, skip, to=i+skip) @@ -781,11 +799,11 @@ def compile(p, flags=0): else: pattern = None - code = _code(p, flags) + data = _code(p, flags) if flags & SRE_FLAG_DEBUG: print() - dis(code) + dis(data.code) # map in either direction groupindex = p.state.groupdict @@ -794,7 +812,6 @@ def compile(p, flags=0): indexgroup[i] = k return _sre.compile( - pattern, flags | p.state.flags, code, - p.state.groups-1, - groupindex, tuple(indexgroup) - ) + pattern, flags | p.state.flags, data.code, + p.state.groups-1, groupindex, tuple(indexgroup), + data.repeat_count) |