summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2008-02-10 20:35:16 (GMT)
committerRaymond Hettinger <python@rcn.com>2008-02-10 20:35:16 (GMT)
commitda614dcc4f56bfb136c53b04d60889870d969926 (patch)
treea61612e5c30d966b64d003ffcf076b9533136f3c
parent900b7835268b1ca0ce0034f76a269a0d8eda570b (diff)
downloadcpython-da614dcc4f56bfb136c53b04d60889870d969926.zip
cpython-da614dcc4f56bfb136c53b04d60889870d969926.tar.gz
cpython-da614dcc4f56bfb136c53b04d60889870d969926.tar.bz2
Complete an open todo on pickletools -- add a pickle optimizer.
-rw-r--r--Doc/library/pickletools.rst7
-rw-r--r--Lib/pickletools.py31
-rw-r--r--Misc/NEWS3
3 files changed, 38 insertions, 3 deletions
diff --git a/Doc/library/pickletools.rst b/Doc/library/pickletools.rst
index a19b978..b07e3bd 100644
--- a/Doc/library/pickletools.rst
+++ b/Doc/library/pickletools.rst
@@ -35,3 +35,10 @@ probably won't find the :mod:`pickletools` module relevant.
the opcode's argument; *pos* is the position at which this opcode is located.
*pickle* can be a string or a file-like object.
+.. function:: optimize(picklestring)
+
+ Returns a new equivalent pickle string after eliminating unused ``PUT``
+ opcodes. The optimized pickle is shorter, takes less transmission time,
+ requires less storage space, and unpickles more efficiently.
+
+ .. versionadded:: 2.6
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index 98f80f1..ae02a36 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -10,9 +10,7 @@ dis(pickle, out=None, memo=None, indentlevel=4)
Print a symbolic disassembly of a pickle.
'''
-__all__ = ['dis',
- 'genops',
- ]
+__all__ = ['dis', 'genops', 'optimize']
# Other ideas:
#
@@ -1858,6 +1856,33 @@ def genops(pickle):
break
##############################################################################
+# A pickle optimizer.
+
+def optimize(p):
+ 'Optimize a pickle string by removing unused PUT opcodes'
+ gets = set() # set of args used by a GET opcode
+ puts = [] # (arg, startpos, stoppos) for the PUT opcodes
+ prevpos = None # set to pos if previous opcode was a PUT
+ for opcode, arg, pos in genops(p):
+ if prevpos is not None:
+ puts.append((prevarg, prevpos, pos))
+ prevpos = None
+ if 'PUT' in opcode.name:
+ prevarg, prevpos = arg, pos
+ elif 'GET' in opcode.name:
+ gets.add(arg)
+
+ # Copy the pickle string except for PUTS without a corresponding GET
+ s = []
+ i = 0
+ for arg, start, stop in puts:
+ j = stop if (arg in gets) else start
+ s.append(p[i:j])
+ i = stop
+ s.append(p[i:])
+ return ''.join(s)
+
+##############################################################################
# A symbolic pickle disassembler.
def dis(pickle, out=None, memo=None, indentlevel=4):
diff --git a/Misc/NEWS b/Misc/NEWS
index 96ea7d7..c519ba6 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -400,6 +400,9 @@ Core and builtins
Library
-------
+- The pickletools module now provides an optimize() function
+ that eliminates unused PUT opcodes from a pickle string.
+
- #2021: Allow tempfile.NamedTemporaryFile and SpooledTemporaryFile
to be used in with statements by correctly supporting the context
management protocol.