summaryrefslogtreecommitdiffstats
path: root/Lib/pickle.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1995-01-10 00:31:14 (GMT)
committerGuido van Rossum <guido@python.org>1995-01-10 00:31:14 (GMT)
commita48061a5804418a63aac24bfce444fd555e3ffe7 (patch)
treeb79aefe89c044651dc6ac1f9981c9c7530c0e666 /Lib/pickle.py
parent8a30adc33ffe494f464f1a30c42f28fb3088970a (diff)
downloadcpython-a48061a5804418a63aac24bfce444fd555e3ffe7.zip
cpython-a48061a5804418a63aac24bfce444fd555e3ffe7.tar.gz
cpython-a48061a5804418a63aac24bfce444fd555e3ffe7.tar.bz2
shelve.py: database of persistent objects, on top of pickle.py and anydbm.py
pickle.py: new low-level persistency module (used to be called flatten) dbmac.py: stupid dbm clone for the Mac anydbm.py: generic dbm interface (should be extended to support gdbm)
Diffstat (limited to 'Lib/pickle.py')
-rw-r--r--Lib/pickle.py504
1 files changed, 504 insertions, 0 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
new file mode 100644
index 0000000..b5ade57
--- /dev/null
+++ b/Lib/pickle.py
@@ -0,0 +1,504 @@
+"""\
+Pickling Algorithm
+------------------
+
+This module implements a basic but powerful algorithm for "pickling" (a.k.a.
+serializing, marshalling or flattening) nearly arbitrary Python objects.
+This is a more primitive notion than persistency -- although pickle
+reads and writes file objects, it does not handle the issue of naming
+persistent objects, nor the (even more complicated) area of concurrent
+access to persistent objects. The pickle module can transform a complex
+object into a byte stream and it can transform the byte stream into
+an object with the same internal structure. The most obvious thing to
+do with these byte streams is to write them onto a file, but it is also
+conceivable to send them across a network or store them in a database.
+
+Unlike the built-in marshal module, pickle handles the following correctly:
+
+- recursive objects
+- pointer sharing
+- class instances
+
+Pickle is Python-specific. This has the advantage that there are no
+restrictions imposed by external standards such as CORBA (which probably
+can't represent pointer sharing or recursive objects); however it means
+that non-Python programs may not be able to reconstruct pickled Python
+objects.
+
+Pickle uses a printable ASCII representation. This is slightly more
+voluminous than a binary representation. However, small integers actually
+take *less* space when represented as minimal-size decimal strings than
+when represented as 32-bit binary numbers, and strings are only much longer
+if they contain control characters or 8-bit characters. The big advantage
+of using printable ASCII (and of some other characteristics of pickle's
+representation) is that for debugging or recovery purposes it is possible
+for a human to read the pickled file with a standard text editor. (I could
+have gone a step further and used a notation like S-expressions, but the
+parser would have been considerably more complicated and slower, and the
+files would probably have become much larger.)
+
+Pickle doesn't handle code objects, which marshal does.
+I suppose pickle could, and maybe it should, but there's probably no
+great need for it right now (as long as marshal continues to be used
+for reading and writing code objects), and at least this avoids
+the possibility of smuggling Trojan horses into a program.
+
+For the benefit of persistency modules written using pickle, it supports
+the notion of a reference to an object outside the pickled data stream.
+Such objects are referenced by a name, which is an arbitrary string of
+printable ASCII characters. The resolution of such names is not defined
+by the pickle module -- the persistent object module will have to implement
+a method "persistent_load". To write references to persistent objects,
+the persistent module must define a method "persistent_id" which returns
+either None or the persistent ID of the object.
+
+There are some restrictions on the pickling of class instances.
+
+First of all, the class must be defined at the top level in a module.
+
+Next, it must normally be possible to create class instances by calling
+the class without arguments. If this is undesirable, the class can
+define a method __getinitargs__ (XXX not a pretty name!), which should
+return a *tuple* containing the arguments to be passed to the class
+constructor.
+
+Classes can influence how they are pickled -- if the class defines
+the method __getstate__, it is called and the return state is pickled
+as the contents for the instance, and if the class defines the
+method __setstate__, it is called with the unpickled state. (Note
+that these methods can also be used to implement copying class instances.)
+If there is no __getstate__ method, the instance's __dict__
+is pickled. If there is no __setstate__ method, the pickled object
+must be a dictionary and its items are assigned to the new instance's
+dictionary. (If a class defines both __getstate__ and __setstate__,
+the state object needn't be a dictionary -- these methods can do what they
+want.)
+
+Note that when class instances are pickled, their class's code and data
+is not pickled along with them. Only the instance data is pickled.
+This is done on purpose, so you can fix bugs in a class or add methods and
+still load objects that were created with an earlier version of the
+class. If you plan to have long-lived objects that will see many versions
+of a class, it may be worth to put a version number in the objects so
+that suitable conversions can be made by the class's __setstate__ method.
+
+The interface is as follows:
+
+To pickle an object x onto a file f. open for writing:
+
+ p = pickle.Pickler(f)
+ p.dump(x)
+
+To unpickle an object x from a file f, open for reading:
+
+ u = pickle.Unpickler(f)
+ x = u.load(x)
+
+The Pickler class only calls the method f.write with a string argument
+(XXX possibly the interface should pass f.write instead of f).
+The Unpickler calls the methods f.read(with an integer argument)
+and f.readline(without argument), both returning a string.
+It is explicitly allowed to pass non-file objects here, as long as they
+have the right methods.
+
+The following types can be pickled:
+
+- None
+- integers, long integers, floating point numbers
+- strings
+- tuples, lists and dictionaries containing picklable objects
+- class instances whose __dict__ or __setstate__() is picklable
+
+Attempts to pickle unpicklable objects will raise an exception
+after having written an unspecified number of bytes to the file argument.
+
+It is possible to make multiple calls to Pickler.dump() or to
+Unpickler.load(), as long as there is a one-to-one correspondence
+betwee pickler and Unpickler objects and between dump and load calls
+for any pair of corresponding Pickler and Unpicklers. WARNING: this
+is intended for pickleing multiple objects without intervening modifications
+to the objects or their parts. If you modify an object and then pickle
+it again using the same Pickler instance, the object is not pickled
+again -- a reference to it is pickled and the Unpickler will return
+the old value, not the modified one. (XXX There are two problems here:
+(a) detecting changes, and (b) marshalling a minimal set of changes.
+I have no answers. Garbage Collection may also become a problem here.)
+"""
+
+__format_version__ = "1.0" # File format version
+__version__ = "1.2" # Code version
+
+from types import *
+import string
+
+AtomicTypes = [NoneType, IntType, FloatType, StringType]
+
+def safe(object):
+ t = type(object)
+ if t in AtomicTypes:
+ return 1
+ if t is TupleType:
+ for item in object:
+ if not safe(item): return 0
+ return 1
+ return 0
+
+MARK = '('
+POP = '0'
+DUP = '2'
+STOP = '.'
+TUPLE = 't'
+LIST = 'l'
+DICT = 'd'
+INST = 'i'
+GET = 'g'
+PUT = 'p'
+APPEND = 'a'
+SETITEM = 's'
+BUILD = 'b'
+NONE = 'N'
+INT = 'I'
+LONG = 'L'
+FLOAT = 'F'
+STRING = 'S'
+PERSID = 'P'
+AtomicKeys = [NONE, INT, LONG, FLOAT, STRING]
+AtomicMap = {
+ NoneType: NONE,
+ IntType: INT,
+ LongType: LONG,
+ FloatType: FLOAT,
+ StringType: STRING,
+}
+
+class Pickler:
+
+ def __init__(self, file):
+ self.write = file.write
+ self.memo = {}
+
+ def dump(self, object):
+ self.save(object)
+ self.write(STOP)
+
+ def save(self, object):
+ pid = self.persistent_id(object)
+ if pid:
+ self.write(PERSID + str(pid) + '\n')
+ return
+ d = id(object)
+ if self.memo.has_key(d):
+ self.write(GET + `d` + '\n')
+ return
+ t = type(object)
+ self.dispatch[t](self, object)
+
+ def persistent_id(self, object):
+ return None
+
+ dispatch = {}
+
+ def save_none(self, object):
+ self.write(NONE)
+ dispatch[NoneType] = save_none
+
+ def save_int(self, object):
+ self.write(INT + `object` + '\n')
+ dispatch[IntType] = save_int
+
+ def save_long(self, object):
+ self.write(LONG + `object` + '\n')
+ dispatch[LongType] = save_long
+
+ def save_float(self, object):
+ self.write(FLOAT + `object` + '\n')
+ dispatch[FloatType] = save_float
+
+ def save_string(self, object):
+ d = id(object)
+ self.write(STRING + `object` + '\n')
+ self.write(PUT + `d` + '\n')
+ self.memo[d] = object
+ dispatch[StringType] = save_string
+
+ def save_tuple(self, object):
+ d = id(object)
+ self.write(MARK)
+ n = len(object)
+ for k in range(n):
+ self.save(object[k])
+ if self.memo.has_key(d):
+ # Saving object[k] has saved us!
+ while k >= 0:
+ self.write(POP)
+ k = k-1
+ self.write(GET + `d` + '\n')
+ break
+ else:
+ self.write(TUPLE + PUT + `d` + '\n')
+ self.memo[d] = object
+ dispatch[TupleType] = save_tuple
+
+ def save_list(self, object):
+ d = id(object)
+ self.write(MARK)
+ n = len(object)
+ for k in range(n):
+ item = object[k]
+ if not safe(item):
+ break
+ self.save(item)
+ else:
+ k = n
+ self.write(LIST + PUT + `d` + '\n')
+ self.memo[d] = object
+ for k in range(k, n):
+ item = object[k]
+ self.save(item)
+ self.write(APPEND)
+ dispatch[ListType] = save_list
+
+ def save_dict(self, object):
+ d = id(object)
+ self.write(MARK)
+ items = object.items()
+ n = len(items)
+ for k in range(n):
+ key, value = items[k]
+ if not safe(key) or not safe(value):
+ break
+ self.save(key)
+ self.save(value)
+ else:
+ k = n
+ self.write(DICT + PUT + `d` + '\n')
+ self.memo[d] = object
+ for k in range(k, n):
+ key, value = items[k]
+ self.save(key)
+ self.save(value)
+ self.write(SETITEM)
+ dispatch[DictionaryType] = save_dict
+
+ def save_inst(self, object):
+ d = id(object)
+ cls = object.__class__
+ module = whichmodule(cls)
+ name = cls.__name__
+ if hasattr(object, '__getinitargs__'):
+ args = object.__getinitargs__()
+ len(args) # XXX Assert it's a sequence
+ else:
+ args = ()
+ self.write(MARK)
+ for arg in args:
+ self.save(arg)
+ self.write(INST + module + '\n' + name + '\n' +
+ PUT + `d` + '\n')
+ self.memo[d] = object
+ try:
+ getstate = object.__getstate__
+ except AttributeError:
+ stuff = object.__dict__
+ else:
+ stuff = getstate()
+ self.save(stuff)
+ self.write(BUILD)
+ dispatch[InstanceType] = save_inst
+
+
+classmap = {}
+
+def whichmodule(cls):
+ """Figure out the module in which a class occurs.
+
+ Search sys.modules for the module.
+ Cache in classmap.
+ Return a module name.
+ If the class cannot be found, return __main__.
+ """
+ if classmap.has_key(cls):
+ return classmap[cls]
+ import sys
+ clsname = cls.__name__
+ for name, module in sys.modules.items():
+ if module.__name__ != '__main__' and \
+ hasattr(module, clsname) and \
+ getattr(module, clsname) is cls:
+ break
+ else:
+ name = '__main__'
+ classmap[cls] = name
+ return name
+
+
+class Unpickler:
+
+ def __init__(self, file):
+ self.readline = file.readline
+ self.read = file.read
+ self.memo = {}
+
+ def load(self):
+ self.mark = ['spam'] # Any new unique object
+ self.stack = []
+ try:
+ while 1:
+ key = self.read(1)
+ self.dispatch[key](self)
+ except STOP, value:
+ return value
+
+ def marker(self):
+ k = len(self.stack)-1
+ while self.stack[k] != self.mark: k = k-1
+ return k
+
+ dispatch = {}
+
+ def load_persid(self):
+ pid = self.readline()[:-1]
+ self.stack.append(self.persisent_load(pid))
+ dispatch[PERSID] = load_persid
+
+ def load_none(self):
+ self.stack.append(None)
+ dispatch[NONE] = load_none
+
+ def load_atomic(self):
+ self.stack.append(eval(self.readline()[:-1]))
+ dispatch[INT] = load_atomic
+ dispatch[LONG] = load_atomic
+ dispatch[FLOAT] = load_atomic
+ dispatch[STRING] = load_atomic
+
+ def load_tuple(self):
+ k = self.marker()
+ self.stack[k:] = [tuple(self.stack[k+1:])]
+ dispatch[TUPLE] = load_tuple
+
+ def load_list(self):
+ k = self.marker()
+ self.stack[k:] = [self.stack[k+1:]]
+ dispatch[LIST] = load_list
+
+ def load_dict(self):
+ k = self.marker()
+ d = {}
+ items = self.stack[k+1:]
+ for i in range(0, len(items), 2):
+ key = items[i]
+ value = items[i+1]
+ d[key] = value
+ self.stack[k:] = [d]
+ dispatch[DICT] = load_dict
+
+ def load_inst(self):
+ k = self.marker()
+ args = tuple(self.stack[k+1:])
+ del self.stack[k:]
+ module = self.readline()[:-1]
+ name = self.readline()[:-1]
+ env = {}
+ try:
+ exec 'from %s import %s' % (module, name) in env
+ except ImportError:
+ raise SystemError, \
+ "Failed to import class %s from module %s" % \
+ (name, module)
+ else:
+ klass = env[name]
+ if type(klass) != ClassType:
+ raise SystemError, \
+ "imported object %s from module %s is not a class" % \
+ (name, module)
+ value = apply(klass, args)
+ self.stack.append(value)
+ dispatch[INST] = load_inst
+
+ def load_pop(self):
+ del self.stack[-1]
+ dispatch[POP] = load_pop
+
+ def load_dup(self):
+ stack.append(stack[-1])
+ dispatch[DUP] = load_dup
+
+ def load_get(self):
+ self.stack.append(self.memo[string.atoi(self.readline()[:-1])])
+ dispatch[GET] = load_get
+
+ def load_put(self):
+ self.memo[string.atoi(self.readline()[:-1])] = self.stack[-1]
+ dispatch[PUT] = load_put
+
+ def load_append(self):
+ value = self.stack[-1]
+ del self.stack[-1]
+ list = self.stack[-1]
+ list.append(value)
+ dispatch[APPEND] = load_append
+
+ def load_setitem(self):
+ value = self.stack[-1]
+ key = self.stack[-2]
+ del self.stack[-2:]
+ dict = self.stack[-1]
+ dict[key] = value
+ dispatch[SETITEM] = load_setitem
+
+ def load_build(self):
+ value = self.stack[-1]
+ del self.stack[-1]
+ inst = self.stack[-1]
+ try:
+ setstate = inst.__setstate__
+ except AttributeError:
+ for key in value.keys():
+ inst.__dict__[key] = value[key]
+ else:
+ setstate(value)
+ dispatch[BUILD] = load_build
+
+ def load_mark(self):
+ self.stack.append(self.mark)
+ dispatch[MARK] = load_mark
+
+ def load_stop(self):
+ value = self.stack[-1]
+ del self.stack[-1]
+ raise STOP, value
+ dispatch[STOP] = load_stop
+
+
+class C:
+ def __cmp__(self, other):
+ return cmp(self.__dict__, other.__dict__)
+
+def test():
+ fn = 'pickle_tmp'
+ c = C()
+ c.foo = 1
+ c.bar = 2
+ x = [0,1,2,3]
+ y = ('abc', 'abc', c, c)
+ x.append(y)
+ x.append(y)
+ x.append(5)
+ f = open(fn, 'w')
+ F = Pickler(f)
+ F.dump(x)
+ f.close()
+ f = open(fn, 'r')
+ U = Unpickler(f)
+ x2 = U.load()
+ print x
+ print x2
+ print x == x2
+ print map(id, x)
+ print map(id, x2)
+ print F.memo
+ print U.memo
+
+if __name__ == '__main__':
+ test()