From 5f3b63ad6f85bfe162a637d2b7fd8f3a3245b0e2 Mon Sep 17 00:00:00 2001 From: Alexandre Vassalotti Date: Sat, 18 Oct 2008 20:47:58 +0000 Subject: Improve pickle's documentation. Use double-space for ending a sentence. Add dbpickle.py example. Improve description about persistent IDs. --- Doc/includes/dbpickle.py | 88 +++++++++++++++++++++++++++++++ Doc/library/pickle.rst | 131 +++++++++++++++++------------------------------ 2 files changed, 135 insertions(+), 84 deletions(-) create mode 100644 Doc/includes/dbpickle.py diff --git a/Doc/includes/dbpickle.py b/Doc/includes/dbpickle.py new file mode 100644 index 0000000..d2eee6c --- /dev/null +++ b/Doc/includes/dbpickle.py @@ -0,0 +1,88 @@ +# Simple example presenting how persistent ID can be used to pickle +# external objects by reference. + +import pickle +import sqlite3 +from collections import namedtuple + +# Simple class representing a record in our database. +MemoRecord = namedtuple("MemoRecord", "key, task") + +class DBPickler(pickle.Pickler): + + def persistent_id(self, obj): + # Instead of pickling MemoRecord as a regular class instance, we emit a + # persistent ID instead. + if isinstance(obj, MemoRecord): + # Here, our persistent ID is simply a tuple containing a tag and a + # key which refers to a specific record in the database. + return ("MemoRecord", obj.key) + else: + # If obj does not have a persistent ID, return None. This means obj + # needs to be pickled as usual. + return None + + +class DBUnpickler(pickle.Unpickler): + + def __init__(self, file, connection): + super().__init__(file) + self.connection = connection + + def persistent_load(self, pid): + # This method is invoked whenever a persistent ID is encountered. + # Here, pid is the tuple returned by DBPickler. + cursor = self.connection.cursor() + type_tag, key_id = pid + if type_tag == "MemoRecord": + # Fetch the referenced record from the database and return it. + cursor.execute("SELECT * FROM memos WHERE key=?", (str(key_id),)) + key, task = cursor.fetchone() + return MemoRecord(key, task) + else: + # Always raises an error if you cannot return the correct object. + # Otherwise, the unpickler will think None is the object referenced + # by the persistent ID. + raise pickle.UnpicklingError("unsupported persistent object") + + +def main(verbose=True): + import io, pprint + + # Initialize and populate our database. + conn = sqlite3.connect(":memory:") + cursor = conn.cursor() + cursor.execute("CREATE TABLE memos(key INTEGER PRIMARY KEY, task TEXT)") + tasks = ( + 'give food to fish', + 'prepare group meeting', + 'fight with a zebra', + ) + for task in tasks: + cursor.execute("INSERT INTO memos VALUES(NULL, ?)", (task,)) + + # Fetch the records to be pickled. + cursor.execute("SELECT * FROM memos") + memos = [MemoRecord(key, task) for key, task in cursor] + # Save the records using our custom DBPickler. + file = io.BytesIO() + DBPickler(file).dump(memos) + + if verbose: + print("Records to be pickled:") + pprint.pprint(memos) + + # Update a record, just for good measure. + cursor.execute("UPDATE memos SET task='learn italian' WHERE key=1") + + # Load the reports from the pickle data stream. + file.seek(0) + memos = DBUnpickler(file, conn).load() + + if verbose: + print("Unpickled records:") + pprint.pprint(memos) + + +if __name__ == '__main__': + main() diff --git a/Doc/library/pickle.rst b/Doc/library/pickle.rst index 4aab5f5..aaee314 100644 --- a/Doc/library/pickle.rst +++ b/Doc/library/pickle.rst @@ -27,7 +27,7 @@ Relationship to other Python modules ------------------------------------ The :mod:`pickle` module has an transparent optimizer (:mod:`_pickle`) written -in C. It is used whenever available. Otherwise the pure Python implementation is +in C. It is used whenever available. Otherwise the pure Python implementation is used. Python has a more primitive serialization module called :mod:`marshal`, but in @@ -108,7 +108,7 @@ There are currently 4 different protocols which can be used for pickling. efficient pickling of :term:`new-style class`\es. * Protocol version 3 was added in Python 3.0. It has explicit support for - bytes and cannot be unpickled by Python 2.x pickle modules. This is + bytes and cannot be unpickled by Python 2.x pickle modules. This is the current recommended protocol, use it whenever it is possible. Refer to :pep:`307` for more information. @@ -166,7 +166,7 @@ process more convenient: Python needed to read the pickle produced. The *file* argument must have a write() method that accepts a single bytes - argument. It can thus be a file object opened for binary writing, a + argument. It can thus be a file object opened for binary writing, a io.BytesIO instance, or any other custom object that meets this interface. .. function:: dumps(obj[, protocol]) @@ -220,7 +220,7 @@ The :mod:`pickle` module defines three exceptions: .. exception:: PickleError - Common base class for the other pickling exceptions. It inherits + Common base class for the other pickling exceptions. It inherits :exc:`Exception`. .. exception:: PicklingError @@ -228,10 +228,13 @@ The :mod:`pickle` module defines three exceptions: Error raised when an unpicklable object is encountered by :class:`Pickler`. It inherits :exc:`PickleError`. + Refer to :ref:`pickle-picklable` to learn what kinds of objects can be + pickled. + .. exception:: UnpicklingError Error raised when there a problem unpickling an object, such as a data - corruption or a security violation. It inherits :exc:`PickleError`. + corruption or a security violation. It inherits :exc:`PickleError`. Note that other exceptions may also be raised during unpickling, including (but not necessarily limited to) AttributeError, EOFError, ImportError, and @@ -254,7 +257,7 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and Python needed to read the pickle produced. The *file* argument must have a write() method that accepts a single bytes - argument. It can thus be a file object opened for binary writing, a + argument. It can thus be a file object opened for binary writing, a io.BytesIO instance, or any other custom object that meets this interface. .. method:: dump(obj) @@ -276,8 +279,8 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and .. method:: clear_memo() - Deprecated. Use the :meth:`clear` method on the :attr:`memo`. Clear the - pickler's memo, useful when reusing picklers. + Deprecated. Use the :meth:`clear` method on :attr:`memo`, instead. + Clear the pickler's memo, useful when reusing picklers. .. attribute:: fast @@ -329,24 +332,28 @@ return the old value, not the modified one. Read a pickled object representation from the open file object given in the constructor, and return the reconstituted object hierarchy specified - therein. Bytes past the pickled object's representation are ignored. + therein. Bytes past the pickled object's representation are ignored. .. method:: persistent_load(pid) Raise an :exc:`UnpickingError` by default. If defined, :meth:`persistent_load` should return the object specified by - the persistent ID *pid*. On errors, such as if an invalid persistent ID is - encountered, an :exc:`UnpickingError` should be raised. + the persistent ID *pid*. If an invalid persistent ID is encountered, an + :exc:`UnpickingError` should be raised. See :ref:`pickle-persistent` for details and examples of uses. .. method:: find_class(module, name) - Import *module* if necessary and return the object called *name* from it. - Subclasses may override this to gain control over what type of objects can - be loaded, potentially reducing security risks. + Import *module* if necessary and return the object called *name* from it, + where the *module* and *name* arguments are :class:`str` objects. + + Subclasses may override this to gain control over what type of objects and + how they can be loaded, potentially reducing security risks. + +.. _pickle-picklable: What can be pickled and unpickled? ---------------------------------- @@ -372,9 +379,9 @@ The following types can be pickled: Attempts to pickle unpicklable objects will raise the :exc:`PicklingError` exception; when this happens, an unspecified number of bytes may have already -been written to the underlying file. Trying to pickle a highly recursive data +been written to the underlying file. Trying to pickle a highly recursive data structure may exceed the maximum recursion depth, a :exc:`RuntimeError` will be -raised in this case. You can carefully raise this limit with +raised in this case. You can carefully raise this limit with :func:`sys.setrecursionlimit`. Note that functions (built-in and user-defined) are pickled by "fully qualified" @@ -390,7 +397,7 @@ pickled, so in the following example the class attribute ``attr`` is not restored in the unpickling environment:: class Foo: - attr = 'a class attr' + attr = 'A class attribute' picklestring = pickle.dumps(Foo) @@ -571,79 +578,30 @@ Pickling and unpickling external objects For the benefit of object persistence, the :mod:`pickle` module supports the notion of a reference to an object outside the pickled data stream. Such -objects are referenced by a "persistent id", which is just an arbitrary string -of printable ASCII characters. The resolution of such names is not defined by -the :mod:`pickle` module; it will delegate this resolution to user defined -functions on the pickler and unpickler. +objects are referenced by a persistent ID, which should be either a string of +alphanumeric characters (for protocol 0) [#]_ or just an arbitrary object (for +any newer protocol). -To define external persistent id resolution, you need to set the -:attr:`persistent_id` attribute of the pickler object and the -:attr:`persistent_load` attribute of the unpickler object. +The resolution of such persistent IDs is not defined by the :mod:`pickle` +module; it will delegate this resolution to the user defined methods on the +pickler and unpickler, :meth:`persistent_id` and :meth:`persistent_load` +respectively. To pickle objects that have an external persistent id, the pickler must have a -custom :func:`persistent_id` method that takes an object as an argument and +custom :meth:`persistent_id` method that takes an object as an argument and returns either ``None`` or the persistent id for that object. When ``None`` is -returned, the pickler simply pickles the object as normal. When a persistent id -string is returned, the pickler will pickle that string, along with a marker so -that the unpickler will recognize the string as a persistent id. +returned, the pickler simply pickles the object as normal. When a persistent ID +string is returned, the pickler will pickle that object, along with a marker so +that the unpickler will recognize it as a persistent ID. To unpickle external objects, the unpickler must have a custom -:func:`persistent_load` function that takes a persistent id string and returns -the referenced object. - -Here's a silly example that *might* shed more light:: - - import pickle - from io import StringIO - - src = StringIO() - p = pickle.Pickler(src) - - def persistent_id(obj): - if hasattr(obj, 'x'): - return 'the value %d' % obj.x - else: - return None - - p.persistent_id = persistent_id +:meth:`persistent_load` method that takes a persistent ID object and returns the +referenced object. - class Integer: - def __init__(self, x): - self.x = x - def __str__(self): - return 'My name is integer %d' % self.x +Example: - i = Integer(7) - print(i) - p.dump(i) - - datastream = src.getvalue() - print(repr(datastream)) - dst = StringIO(datastream) - - up = pickle.Unpickler(dst) - - class FancyInteger(Integer): - def __str__(self): - return 'I am the integer %d' % self.x - - def persistent_load(persid): - if persid.startswith('the value '): - value = int(persid.split()[2]) - return FancyInteger(value) - else: - raise pickle.UnpicklingError('Invalid persistent id') - - up.persistent_load = persistent_load - - j = up.load() - print(j) - - -.. BAW: pickle supports something called inst_persistent_id() - which appears to give unknown types a second shot at producing a persistent - id. Since Jim Fulton can't remember why it was added or what it's for, I'm - leaving it undocumented. +.. highlightlang:: python +.. literalinclude:: ../includes/dbpickle.py .. _pickle-sub: @@ -808,5 +766,10 @@ the same process or a new process. :: .. [#] These methods can also be used to implement copying class instances. -.. [#] This protocol is also used by the shallow and deep copying operations defined in - the :mod:`copy` module. +.. [#] This protocol is also used by the shallow and deep copying operations + defined in the :mod:`copy` module. + +.. [#] The limitation on alphanumeric characters is due to the fact the + persistent IDs, in protocol 0, are delimited by the newline character. + Therefore if any kind of newline characters, such as \r and \n, occurs in + persistent IDs, the resulting pickle will become unreadable. -- cgit v0.12