diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-12-16 16:00:56 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-12-16 16:00:56 (GMT) |
commit | 05dadcfb28b815c9558fe2a6a74cd3ce7df62577 (patch) | |
tree | 16b06d95b5343b02e758d64d8b7ad13ca57fd145 /Lib/pickletools.py | |
parent | df9386940a0104ba4dce1beee38848bc44a17036 (diff) | |
download | cpython-05dadcfb28b815c9558fe2a6a74cd3ce7df62577.zip cpython-05dadcfb28b815c9558fe2a6a74cd3ce7df62577.tar.gz cpython-05dadcfb28b815c9558fe2a6a74cd3ce7df62577.tar.bz2 |
Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can
produce more compact result and no longer produces invalid output if input
data contains MEMOIZE opcodes together with PUT or BINPUT opcodes.
Diffstat (limited to 'Lib/pickletools.py')
-rw-r--r-- | Lib/pickletools.py | 69 |
1 files changed, 45 insertions, 24 deletions
diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 71c2aa1..6b86723 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -2282,40 +2282,61 @@ def genops(pickle): def optimize(p): 'Optimize a pickle string by removing unused PUT opcodes' - not_a_put = object() - gets = { not_a_put } # set of args used by a GET opcode - opcodes = [] # (startpos, stoppos, putid) + put = 'PUT' + get = 'GET' + oldids = set() # set of all PUT ids + newids = {} # set of ids used by a GET opcode + opcodes = [] # (op, idx) or (pos, end_pos) proto = 0 + protoheader = b'' for opcode, arg, pos, end_pos in _genops(p, yield_end_pos=True): if 'PUT' in opcode.name: - opcodes.append((pos, end_pos, arg)) + oldids.add(arg) + opcodes.append((put, arg)) + elif opcode.name == 'MEMOIZE': + idx = len(oldids) + oldids.add(idx) + opcodes.append((put, idx)) elif 'FRAME' in opcode.name: pass - else: - if 'GET' in opcode.name: - gets.add(arg) - elif opcode.name == 'PROTO': - assert pos == 0, pos + elif 'GET' in opcode.name: + if opcode.proto > proto: + proto = opcode.proto + newids[arg] = None + opcodes.append((get, arg)) + elif opcode.name == 'PROTO': + if arg > proto: proto = arg - opcodes.append((pos, end_pos, not_a_put)) - prevpos, prevarg = pos, None + if pos == 0: + protoheader = p[pos: end_pos] + else: + opcodes.append((pos, end_pos)) + else: + opcodes.append((pos, end_pos)) + del oldids # Copy the opcodes except for PUTS without a corresponding GET out = io.BytesIO() - opcodes = iter(opcodes) - if proto >= 2: - # Write the PROTO header before any framing - start, stop, _ = next(opcodes) - out.write(p[start:stop]) - buf = pickle._Framer(out.write) + # Write the PROTO header before any framing + out.write(protoheader) + pickler = pickle._Pickler(out, proto) if proto >= 4: - buf.start_framing() - for start, stop, putid in opcodes: - if putid in gets: - buf.commit_frame() - buf.write(p[start:stop]) - if proto >= 4: - buf.end_framing() + pickler.framer.start_framing() + idx = 0 + for op, arg in opcodes: + if op is put: + if arg not in newids: + continue + data = pickler.put(idx) + newids[arg] = idx + idx += 1 + elif op is get: + data = pickler.get(newids[arg]) + else: + data = p[op:arg] + pickler.framer.commit_frame() + pickler.write(data) + pickler.framer.end_framing() return out.getvalue() ############################################################################## |