diff options
author | Raymond Hettinger <python@rcn.com> | 2008-02-10 20:35:16 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2008-02-10 20:35:16 (GMT) |
commit | da614dcc4f56bfb136c53b04d60889870d969926 (patch) | |
tree | a61612e5c30d966b64d003ffcf076b9533136f3c | |
parent | 900b7835268b1ca0ce0034f76a269a0d8eda570b (diff) | |
download | cpython-da614dcc4f56bfb136c53b04d60889870d969926.zip cpython-da614dcc4f56bfb136c53b04d60889870d969926.tar.gz cpython-da614dcc4f56bfb136c53b04d60889870d969926.tar.bz2 |
Complete an open todo on pickletools -- add a pickle optimizer.
-rw-r--r-- | Doc/library/pickletools.rst | 7 | ||||
-rw-r--r-- | Lib/pickletools.py | 31 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 38 insertions, 3 deletions
diff --git a/Doc/library/pickletools.rst b/Doc/library/pickletools.rst index a19b978..b07e3bd 100644 --- a/Doc/library/pickletools.rst +++ b/Doc/library/pickletools.rst @@ -35,3 +35,10 @@ probably won't find the :mod:`pickletools` module relevant. the opcode's argument; *pos* is the position at which this opcode is located. *pickle* can be a string or a file-like object. +.. function:: optimize(picklestring) + + Returns a new equivalent pickle string after eliminating unused ``PUT`` + opcodes. The optimized pickle is shorter, takes less transmission time, + requires less storage space, and unpickles more efficiently. + + .. versionadded:: 2.6 diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 98f80f1..ae02a36 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -10,9 +10,7 @@ dis(pickle, out=None, memo=None, indentlevel=4) Print a symbolic disassembly of a pickle. ''' -__all__ = ['dis', - 'genops', - ] +__all__ = ['dis', 'genops', 'optimize'] # Other ideas: # @@ -1858,6 +1856,33 @@ def genops(pickle): break ############################################################################## +# A pickle optimizer. + +def optimize(p): + 'Optimize a pickle string by removing unused PUT opcodes' + gets = set() # set of args used by a GET opcode + puts = [] # (arg, startpos, stoppos) for the PUT opcodes + prevpos = None # set to pos if previous opcode was a PUT + for opcode, arg, pos in genops(p): + if prevpos is not None: + puts.append((prevarg, prevpos, pos)) + prevpos = None + if 'PUT' in opcode.name: + prevarg, prevpos = arg, pos + elif 'GET' in opcode.name: + gets.add(arg) + + # Copy the pickle string except for PUTS without a corresponding GET + s = [] + i = 0 + for arg, start, stop in puts: + j = stop if (arg in gets) else start + s.append(p[i:j]) + i = stop + s.append(p[i:]) + return ''.join(s) + +############################################################################## # A symbolic pickle disassembler. def dis(pickle, out=None, memo=None, indentlevel=4): @@ -400,6 +400,9 @@ Core and builtins Library ------- +- The pickletools module now provides an optimize() function + that eliminates unused PUT opcodes from a pickle string. + - #2021: Allow tempfile.NamedTemporaryFile and SpooledTemporaryFile to be used in with statements by correctly supporting the context management protocol. |