summaryrefslogtreecommitdiffstats
path: root/Lib/re/_compiler.py
diff options
context:
space:
mode:
authorMa Lin <animalize@users.noreply.github.com>2022-04-03 16:16:20 (GMT)
committerGitHub <noreply@github.com>2022-04-03 16:16:20 (GMT)
commit6e3eee5c11b539e9aab39cff783acf57838c355a (patch)
tree29ee6720d249adfe5864ab9f09d7cb54d9f35238 /Lib/re/_compiler.py
parentb82cdd1dac9a9be52051abd90a1ce69236ac41f4 (diff)
downloadcpython-6e3eee5c11b539e9aab39cff783acf57838c355a.zip
cpython-6e3eee5c11b539e9aab39cff783acf57838c355a.tar.gz
cpython-6e3eee5c11b539e9aab39cff783acf57838c355a.tar.bz2
bpo-23689: re module, fix memory leak when a match is terminated by a signal or memory allocation failure (GH-32283)
Diffstat (limited to 'Lib/re/_compiler.py')
-rw-r--r--Lib/re/_compiler.py61
1 files changed, 39 insertions, 22 deletions
diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py
index 62da8e5..bedd4b8 100644
--- a/Lib/re/_compiler.py
+++ b/Lib/re/_compiler.py
@@ -67,14 +67,21 @@ _equivalences = (
_ignorecase_fixes = {i: tuple(j for j in t if i != j)
for t in _equivalences for i in t}
+class _CompileData:
+ __slots__ = ('code', 'repeat_count')
+ def __init__(self):
+ self.code = []
+ self.repeat_count = 0
+
def _combine_flags(flags, add_flags, del_flags,
TYPE_FLAGS=_parser.TYPE_FLAGS):
if add_flags & TYPE_FLAGS:
flags &= ~TYPE_FLAGS
return (flags | add_flags) & ~del_flags
-def _compile(code, pattern, flags):
+def _compile(data, pattern, flags):
# internal: compile a (sub)pattern
+ code = data.code
emit = code.append
_len = len
LITERAL_CODES = _LITERAL_CODES
@@ -147,7 +154,7 @@ def _compile(code, pattern, flags):
skip = _len(code); emit(0)
emit(av[0])
emit(av[1])
- _compile(code, av[2], flags)
+ _compile(data, av[2], flags)
emit(SUCCESS)
code[skip] = _len(code) - skip
else:
@@ -155,7 +162,11 @@ def _compile(code, pattern, flags):
skip = _len(code); emit(0)
emit(av[0])
emit(av[1])
- _compile(code, av[2], flags)
+ # now op is in (MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT)
+ if op != POSSESSIVE_REPEAT:
+ emit(data.repeat_count)
+ data.repeat_count += 1
+ _compile(data, av[2], flags)
code[skip] = _len(code) - skip
emit(REPEATING_CODES[op][1])
elif op is SUBPATTERN:
@@ -164,7 +175,7 @@ def _compile(code, pattern, flags):
emit(MARK)
emit((group-1)*2)
# _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
- _compile(code, p, _combine_flags(flags, add_flags, del_flags))
+ _compile(data, p, _combine_flags(flags, add_flags, del_flags))
if group:
emit(MARK)
emit((group-1)*2+1)
@@ -176,7 +187,7 @@ def _compile(code, pattern, flags):
# pop their stack if they reach it
emit(ATOMIC_GROUP)
skip = _len(code); emit(0)
- _compile(code, av, flags)
+ _compile(data, av, flags)
emit(SUCCESS)
code[skip] = _len(code) - skip
elif op in SUCCESS_CODES:
@@ -191,13 +202,13 @@ def _compile(code, pattern, flags):
if lo != hi:
raise error("look-behind requires fixed-width pattern")
emit(lo) # look behind
- _compile(code, av[1], flags)
+ _compile(data, av[1], flags)
emit(SUCCESS)
code[skip] = _len(code) - skip
elif op is CALL:
emit(op)
skip = _len(code); emit(0)
- _compile(code, av, flags)
+ _compile(data, av, flags)
emit(SUCCESS)
code[skip] = _len(code) - skip
elif op is AT:
@@ -216,7 +227,7 @@ def _compile(code, pattern, flags):
for av in av[1]:
skip = _len(code); emit(0)
# _compile_info(code, av, flags)
- _compile(code, av, flags)
+ _compile(data, av, flags)
emit(JUMP)
tailappend(_len(code)); emit(0)
code[skip] = _len(code) - skip
@@ -244,12 +255,12 @@ def _compile(code, pattern, flags):
emit(op)
emit(av[0]-1)
skipyes = _len(code); emit(0)
- _compile(code, av[1], flags)
+ _compile(data, av[1], flags)
if av[2]:
emit(JUMP)
skipno = _len(code); emit(0)
code[skipyes] = _len(code) - skipyes + 1
- _compile(code, av[2], flags)
+ _compile(data, av[2], flags)
code[skipno] = _len(code) - skipno
else:
code[skipyes] = _len(code) - skipyes + 1
@@ -608,17 +619,17 @@ def isstring(obj):
def _code(p, flags):
flags = p.state.flags | flags
- code = []
+ data = _CompileData()
# compile info block
- _compile_info(code, p, flags)
+ _compile_info(data.code, p, flags)
# compile the pattern
- _compile(code, p.data, flags)
+ _compile(data, p.data, flags)
- code.append(SUCCESS)
+ data.code.append(SUCCESS)
- return code
+ return data
def _hex_code(code):
return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
@@ -719,7 +730,7 @@ def dis(code):
else:
print_(FAILURE)
i += 1
- elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE,
+ elif op in (REPEAT_ONE, MIN_REPEAT_ONE,
POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE):
skip, min, max = code[i: i+3]
if max == MAXREPEAT:
@@ -727,6 +738,13 @@ def dis(code):
print_(op, skip, min, max, to=i+skip)
dis_(i+3, i+skip)
i += skip
+ elif op is REPEAT:
+ skip, min, max, repeat_index = code[i: i+4]
+ if max == MAXREPEAT:
+ max = 'MAXREPEAT'
+ print_(op, skip, min, max, repeat_index, to=i+skip)
+ dis_(i+4, i+skip)
+ i += skip
elif op is GROUPREF_EXISTS:
arg, skip = code[i: i+2]
print_(op, arg, skip, to=i+skip)
@@ -781,11 +799,11 @@ def compile(p, flags=0):
else:
pattern = None
- code = _code(p, flags)
+ data = _code(p, flags)
if flags & SRE_FLAG_DEBUG:
print()
- dis(code)
+ dis(data.code)
# map in either direction
groupindex = p.state.groupdict
@@ -794,7 +812,6 @@ def compile(p, flags=0):
indexgroup[i] = k
return _sre.compile(
- pattern, flags | p.state.flags, code,
- p.state.groups-1,
- groupindex, tuple(indexgroup)
- )
+ pattern, flags | p.state.flags, data.code,
+ p.state.groups-1, groupindex, tuple(indexgroup),
+ data.repeat_count)