From 3bf99e3e876cb367cff34c5b9d659361b5ca9525 Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Sun, 8 Dec 2002 18:36:24 +0000 Subject: Add support for binary pickles to the shelve module. In some situations this can result in significantly smaller files. All classes as well as the open function now accept an optional binary parameter, which defaults to False for backward compatibility. Added a small test suite, updated the libref documentation (including documenting the exported classes and fixing a few other nits) and added a note about the change to Misc/NEWS. --- Doc/lib/libshelve.tex | 38 ++++++++++++++++++++++++++++++++---- Lib/shelve.py | 17 +++++++++-------- Lib/test/test_shelve.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ Misc/NEWS | 4 ++++ 4 files changed, 98 insertions(+), 12 deletions(-) create mode 100644 Lib/test/test_shelve.py diff --git a/Doc/lib/libshelve.tex b/Doc/lib/libshelve.tex index 1e02c7b..e8491be 100644 --- a/Doc/lib/libshelve.tex +++ b/Doc/lib/libshelve.tex @@ -19,7 +19,8 @@ arbitrary object): \begin{verbatim} import shelve -d = shelve.open(filename) # open, with (g)dbm filename -- no suffix +d = shelve.open(filename) # open -- file may get suffix added by low-level + # library d[key] = data # store data at key (overwrites old data if # using an existing key) @@ -54,8 +55,10 @@ cause the database to refuse updates. \refbimodindex{gdbm} \item -Dependent on the implementation, closing a persistent dictionary may -or may not be necessary to flush changes to disk. +Depending on the implementation, closing a persistent dictionary may +or may not be necessary to flush changes to disk. The \method{__del__} +method of the \class{Shelf} class calls the \method{close} method, so the +programmer generally need not do this explicitly. \item The \module{shelve} module does not support \emph{concurrent} read/write @@ -67,10 +70,37 @@ requires knowledge about the database implementation used. \end{itemize} +\begin{classdesc}{Shelf}{dict\optional{, binary=False}} +A subclass of \class{UserDict.DictMixin} which stores pickled values in the +\var{dict} object. If the \var{binary} parameter is \constant{True}, binary +pickles will be used. This can provide much more compact storage than plain +text pickles, depending on the nature of the objects stored in the databse. +\end{classdesc} + +\begin{classdesc}{BsdDbShelf}{dict\optional{, binary=False}} +A subclass of \class{Shelf} which exposes \method{first}, \method{next}, +{}\method{previous}, \method{last} and \method{set_location} which are +available in the \module{bsddb} module but not in other database modules. +The \var{dict} object passed to the constructor must support those methods. +This is generally accomplished by calling one of \function{bsddb.hashopen}, +\function{bsddb.btopen} or \function{bsddb.rnopen}. The optional +\var{binary} parameter has the same interpretation as for the \class{Shelf} +class. +\end{classdesc} + +\begin{classdesc}{DbfilenameShelf}{dict\optional{, flag='c'}\optional{, binary=False}} +A subclass of \class{Shelf} which accepts a filename instead of a dict-like +object. The underlying file will be opened using \function{anydbm.open}. +By default, the file will be created and opened for both read and write. +The optional \var{binary} parameter has the same interpretation as for the +\class{Shelf} class. +\end{classdesc} \begin{seealso} \seemodule{anydbm}{Generic interface to \code{dbm}-style databases.} - \seemodule{dbhash}{BSD \code{db} database interface.} + \seemodule{bsddb}{BSD \code{db} database interface.} + \seemodule{dbhash}{Thin layer around the \module{bsddb} which provides an + \function{open} function like the other database modules.} \seemodule{dbm}{Standard \UNIX{} database interface.} \seemodule{dumbdbm}{Portable implementation of the \code{dbm} interface.} \seemodule{gdbm}{GNU database interface, based on the \code{dbm} interface.} diff --git a/Lib/shelve.py b/Lib/shelve.py index 7a318a6..e262d79 100644 --- a/Lib/shelve.py +++ b/Lib/shelve.py @@ -51,8 +51,9 @@ class Shelf(UserDict.DictMixin): See the module's __doc__ string for an overview of the interface. """ - def __init__(self, dict): + def __init__(self, dict, binary=False): self.dict = dict + self.binary = binary def keys(self): return self.dict.keys() @@ -77,7 +78,7 @@ class Shelf(UserDict.DictMixin): def __setitem__(self, key, value): f = StringIO() - p = Pickler(f) + p = Pickler(f, self.binary) p.dump(value) self.dict[key] = f.getvalue() @@ -112,8 +113,8 @@ class BsdDbShelf(Shelf): See the module's __doc__ string for an overview of the interface. """ - def __init__(self, dict): - Shelf.__init__(self, dict) + def __init__(self, dict, binary=False): + Shelf.__init__(self, dict, binary) def set_location(self, key): (key, value) = self.dict.set_location(key) @@ -148,16 +149,16 @@ class DbfilenameShelf(Shelf): See the module's __doc__ string for an overview of the interface. """ - def __init__(self, filename, flag='c'): + def __init__(self, filename, flag='c', binary=False): import anydbm - Shelf.__init__(self, anydbm.open(filename, flag)) + Shelf.__init__(self, anydbm.open(filename, flag), binary) -def open(filename, flag='c'): +def open(filename, flag='c', binary=False): """Open a persistent dictionary for reading and writing. Argument is the filename for the dbm database. See the module's __doc__ string for an overview of the interface. """ - return DbfilenameShelf(filename, flag) + return DbfilenameShelf(filename, flag, binary) diff --git a/Lib/test/test_shelve.py b/Lib/test/test_shelve.py new file mode 100644 index 0000000..29af82e --- /dev/null +++ b/Lib/test/test_shelve.py @@ -0,0 +1,51 @@ +import os +import unittest +import shelve +import glob +from test import test_support + +class TestCase(unittest.TestCase): + + fn = "shelftemp.db" + + def test_ascii_file_shelf(self): + try: + s = shelve.open(self.fn, binary=False) + s['key1'] = (1,2,3,4) + self.assertEqual(s['key1'], (1,2,3,4)) + s.close() + finally: + for f in glob.glob(self.fn+"*"): + os.unlink(f) + + def test_binary_file_shelf(self): + try: + s = shelve.open(self.fn, binary=True) + s['key1'] = (1,2,3,4) + self.assertEqual(s['key1'], (1,2,3,4)) + s.close() + finally: + for f in glob.glob(self.fn+"*"): + os.unlink(f) + + def test_in_memory_shelf(self): + d1 = {} + s = shelve.Shelf(d1, binary=False) + s['key1'] = (1,2,3,4) + self.assertEqual(s['key1'], (1,2,3,4)) + s.close() + d2 = {} + s = shelve.Shelf(d2, binary=True) + s['key1'] = (1,2,3,4) + self.assertEqual(s['key1'], (1,2,3,4)) + s.close() + + self.assertEqual(len(d1), 1) + self.assertNotEqual(d1, d2) + +def test_main(): + test_support.run_unittest(TestCase) + + +if __name__ == "__main__": + test_main() diff --git a/Misc/NEWS b/Misc/NEWS index 0e883bb..68ff040 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -440,6 +440,10 @@ Library all dictionary methods. This eases the transition to persistent storage for scripts originally written with dictionaries in mind. +- shelve.open and the various classes in shelve.py now accept an optional + binary flag, which defaults to False. If True, the values stored in the + shelf are binary pickles. + - A new package, logging, implements the logging API defined by PEP 282. The code is written by Vinay Sajip. -- cgit v0.12