summaryrefslogtreecommitdiffstats
path: root/Doc/lib/libshelve.tex
blob: 9e9f7fe6c45b72574e9b56d1d1030a3f52ae3cdc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
\section{\module{shelve} ---
         Python object persistence}

\declaremodule{standard}{shelve}
\modulesynopsis{Python object persistence.}


A ``shelf'' is a persistent, dictionary-like object.  The difference
with ``dbm'' databases is that the values (not the keys!) in a shelf
can be essentially arbitrary Python objects --- anything that the
\refmodule{pickle} module can handle.  This includes most class
instances, recursive data types, and objects containing lots of shared 
sub-objects.  The keys are ordinary strings.
\refstmodindex{pickle}

\begin{funcdesc}{open}{filename\optional{,flag='c'\optional{,protocol=\code{None}\optional{,writeback=\code{False}}}}}
Open a persistent dictionary.  The filename specified is the base filename
for the underlying database.  As a side-effect, an extension may be added to
the filename and more than one file may be created.  By default, the
underlying database file is opened for reading and writing.  The optional
{}\var{flag} parameter has the same interpretation as the \var{flag}
parameter of \function{anydbm.open}.  

By default, version 0 pickles are used to serialize values. 
The version of the pickle protocol can be specified with the
\var{protocol} parameter. \versionchanged[The \var{protocol}
parameter was added]{2.3}

By default, mutations to persistent-dictionary mutable entries are not
automatically written back.  If the optional \var{writeback} parameter
is set to {}\var{True}, all entries accessed are cached in memory, and
written back at close time; this can make it handier to mutate mutable
entries in the persistent dictionary, but, if many entries are
accessed, it can consume vast amounts of memory for the cache, and it
can make the close operation very slow since all accessed entries are
written back (there is no way to determine which accessed entries are
mutable, nor which ones were actually mutated).

\end{funcdesc}

Shelve objects support all methods supported by dictionaries.  This eases
the transition from dictionary based scripts to those requiring persistent
storage.

\subsection{Restrictions}

\begin{itemize}

\item
The choice of which database package will be used
(such as \refmodule{dbm}, \refmodule{gdbm} or \refmodule{bsddb}) depends on
which interface is available.  Therefore it is not safe to open the database
directly using \refmodule{dbm}.  The database is also (unfortunately) subject
to the limitations of \refmodule{dbm}, if it is used --- this means
that (the pickled representation of) the objects stored in the
database should be fairly small, and in rare cases key collisions may
cause the database to refuse updates.
\refbimodindex{dbm}
\refbimodindex{gdbm}
\refbimodindex{bsddb}

\item
Depending on the implementation, closing a persistent dictionary may
or may not be necessary to flush changes to disk.  The \method{__del__}
method of the \class{Shelf} class calls the \method{close} method, so the
programmer generally need not do this explicitly.

\item
The \module{shelve} module does not support \emph{concurrent} read/write
access to shelved objects.  (Multiple simultaneous read accesses are
safe.)  When a program has a shelf open for writing, no other program
should have it open for reading or writing.  \UNIX{} file locking can
be used to solve this, but this differs across \UNIX{} versions and
requires knowledge about the database implementation used.

\end{itemize}

\begin{classdesc}{Shelf}{dict\optional{, protocol=None\optional{, writeback=False}}}
A subclass of \class{UserDict.DictMixin} which stores pickled values in the
\var{dict} object.  

By default, version 0 pickles are used to serialize values.  The
version of the pickle protocol can be specified with the
\var{protocol} parameter. See the \module{pickle} documentation for a
discussion of the pickle protocols. \versionchanged[The \var{protocol}
parameter was added]{2.3}

If the \var{writeback} parameter is \code{True}, the object will hold a
cache of all entries accessed and write them back to the \var{dict} at
sync and close times.  This allows natural operations on mutable entries,
but can consume much more memory and make sync and close take a long time.
\end{classdesc}

\begin{classdesc}{BsdDbShelf}{dict\optional{, protocol=None\optional{, writeback=False}}}

A subclass of \class{Shelf} which exposes \method{first},
\method{next}, \method{previous}, \method{last} and
\method{set_location} which are available in the \module{bsddb} module
but not in other database modules.  The \var{dict} object passed to
the constructor must support those methods.  This is generally
accomplished by calling one of \function{bsddb.hashopen},
\function{bsddb.btopen} or \function{bsddb.rnopen}.  The optional
\var{protocol} and \var{writeback} parameters have the
same interpretation as for the \class{Shelf} class.

\end{classdesc}

\begin{classdesc}{DbfilenameShelf}{filename\optional{, flag='c'\optional{, protocol=None\optional{, writeback=False}}}}

A subclass of \class{Shelf} which accepts a \var{filename} instead of
a dict-like object.  The underlying file will be opened using
{}\function{anydbm.open}.  By default, the file will be created and
opened for both read and write.  The optional \var{flag} parameter has
the same interpretation as for the \function{open} function.  The
optional \var{protocol} and \var{writeback} parameters
have the same interpretation as for the \class{Shelf} class.
 
\end{classdesc}

\subsection{Example}

To summarize the interface (\code{key} is a string, \code{data} is an
arbitrary object):

\begin{verbatim}
import shelve

d = shelve.open(filename) # open -- file may get suffix added by low-level
                          # library

d[key] = data   # store data at key (overwrites old data if
                # using an existing key)
data = d[key]   # retrieve a COPY of data at key (raise KeyError if no
                # such key)
del d[key]      # delete data stored at key (raises KeyError
                # if no such key)
flag = d.has_key(key)   # true if the key exists
list = d.keys() # a list of all existing keys (slow!)

# as d was opened WITHOUT writeback=True, beware:
d['xx'] = range(4)  # this works as expected, but...
d['xx'].append(5)   # *this doesn't!* -- d['xx'] is STILL range(4)!!!
# having opened d without writeback=True, you need to code carefully:
temp = d['xx']      # extracts the copy
temp.append(5)      # mutates the copy
d['xx'] = temp      # stores the copy right back, to persist it
# or, d=shelve.open(filename,writeback=True) would let you just code
# d['xx'].append(5) and have it work as expected, BUT it would also
# consume more memory and make the d.close() operation slower.

d.close()       # close it
\end{verbatim}

\begin{seealso}
  \seemodule{anydbm}{Generic interface to \code{dbm}-style databases.}
  \seemodule{bsddb}{BSD \code{db} database interface.}
  \seemodule{dbhash}{Thin layer around the \module{bsddb} which provides an
  \function{open} function like the other database modules.}
  \seemodule{dbm}{Standard \UNIX{} database interface.}
  \seemodule{dumbdbm}{Portable implementation of the \code{dbm} interface.}
  \seemodule{gdbm}{GNU database interface, based on the \code{dbm} interface.}
  \seemodule{pickle}{Object serialization used by \module{shelve}.}
  \seemodule{cPickle}{High-performance version of \refmodule{pickle}.}
\end{seealso}