From 153c9e493e9850340fd686ab7a6e5c176953abd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Sat, 19 Apr 2003 20:59:03 +0000 Subject: Patch #553171: Add writeback parameter. Also add protocol parameter. --- Doc/lib/libshelve.tex | 90 +++++++++++++++++++++++++++++++------------- Lib/shelve.py | 99 +++++++++++++++++++++++++++++++++++++++---------- Lib/test/test_shelve.py | 51 ++++++++++++++++++++++--- Misc/NEWS | 3 ++ 4 files changed, 193 insertions(+), 50 deletions(-) diff --git a/Doc/lib/libshelve.tex b/Doc/lib/libshelve.tex index 996c79b..17ef3e5 100644 --- a/Doc/lib/libshelve.tex +++ b/Doc/lib/libshelve.tex @@ -13,15 +13,30 @@ instances, recursive data types, and objects containing lots of shared sub-objects. The keys are ordinary strings. \refstmodindex{pickle} -\begin{funcdesc}{open}{filename\optional{,flag='c'\optional{,binary=\code{False}}}} +\begin{funcdesc}{open}{filename\optional{,flag='c'\optional{,protocol=\code{None}\optional{,writeback=\code{False}\optional{,binary=\code{None}}}}}} Open a persistent dictionary. The filename specified is the base filename for the underlying database. As a side-effect, an extension may be added to the filename and more than one file may be created. By default, the underlying database file is opened for reading and writing. The optional {}\var{flag} pararameter has the same interpretation as the \var{flag} -parameter of \function{anydbm.open}. By default, ASCII pickles are used to -serialize values. If the optional \var{binary} parameter is set to -{}\var{True}, binary pickles will be used instead. +parameter of \function{anydbm.open}. + +By default, version 0 pickles are used to serialize values. +The version of the pickle protocol can be specified with the +\var{protocol} parameter. \versionchanged[The \var{protocol} +parameter was added. The \var{binary} parameter is deprecated +and provided for backwards compatibility only]{2.3} + +By default, mutations to persistent-dictionary mutable entries are not +automatically written back. If the optional \var{writeback} parameter +is set to {}\var{True}, all entries accessed are cached in memory, and +written back at close time; this can make it handier to mutate mutable +entries in the persistent dictionary, but, if many entries are +accessed, it can consume vast amounts of memory for the cache, and it +can make the close operation very slow since all accessed entries are +written back (there is no way to determine which accessed entries are +mutable, nor which ones were actually mutated). + \end{funcdesc} Shelve objects support all methods supported by dictionaries. This eases @@ -61,33 +76,47 @@ requires knowledge about the database implementation used. \end{itemize} -\begin{classdesc}{Shelf}{dict\optional{, binary=False}} +\begin{classdesc}{Shelf}{dict\optional{, protocol=None\optional{, writeback=False\optional{, binary=None}}}} A subclass of \class{UserDict.DictMixin} which stores pickled values in the -\var{dict} object. If the \var{binary} parameter is \code{True}, binary -pickles will be used. This can provide much more compact storage than plain -text pickles, depending on the nature of the objects stored in the database. +\var{dict} object. + +By default, version 0 pickles are used to serialize values. The +version of the pickle protocol can be specified with the +\var{protocol} parameter. See the \module{pickle} documentation for a +discussion of the pickle protocols. \versionchanged[The \var{protocol} +parameter was added. The \var{binary} parameter is deprecated and +provided for backwards compatibility only]{2.3} + +If the \var{writeback} parameter is \code{True}, the object will hold a +cache of all entries accessed and write them back to the \var{dict} at +sync and close times. This allows natural operations on mutable entries, +but can consume much more memory and make sync and close take a long time. \end{classdesc} -\begin{classdesc}{BsdDbShelf}{dict\optional{, binary=False}} -A subclass of \class{Shelf} which exposes \method{first}, \method{next}, -\method{previous}, \method{last} and \method{set_location} which are -available in the \module{bsddb} module but not in other database modules. -The \var{dict} object passed to the constructor must support those methods. -This is generally accomplished by calling one of \function{bsddb.hashopen}, +\begin{classdesc}{BsdDbShelf}{dict\optional{, protocol=None\optional{, writeback=False\optional{, binary=None}}}} + +A subclass of \class{Shelf} which exposes \method{first}, +\method{next}, \method{previous}, \method{last} and +\method{set_location} which are available in the \module{bsddb} module +but not in other database modules. The \var{dict} object passed to +the constructor must support those methods. This is generally +accomplished by calling one of \function{bsddb.hashopen}, \function{bsddb.btopen} or \function{bsddb.rnopen}. The optional -\var{binary} parameter has the same interpretation as for the \class{Shelf} -class. +\var{protocol}, \var{writeback}, and \var{binary} parameters have the +same interpretation as for the \class{Shelf} class. + \end{classdesc} -\begin{classdesc}{DbfilenameShelf}{filename\optional{, flag='c'\optional{, binary=False}}} +\begin{classdesc}{DbfilenameShelf}{filename\optional{, flag='c'\optional{, protocol=None\optional{, writeback=False\optional{, binary=None}}}}} -A subclass of \class{Shelf} which accepts a \var{filename} instead of a -dict-like object. The underlying file will be opened using -{}\function{anydbm.open}. By default, the file will be created and opened -for both read and write. The optional \var{flag} parameter has the same -interpretation as for the \function{open} function. The optional -\var{binary} parameter has the same interpretation as for the -{}\class{Shelf} class. +A subclass of \class{Shelf} which accepts a \var{filename} instead of +a dict-like object. The underlying file will be opened using +{}\function{anydbm.open}. By default, the file will be created and +opened for both read and write. The optional \var{flag} parameter has +the same interpretation as for the \function{open} function. The +optional \var{protocol}, \var{writeback}, and \var{binary} parameters +have the same interpretation as for the \class{Shelf} class. + \end{classdesc} \subsection{Example} @@ -103,13 +132,24 @@ d = shelve.open(filename) # open -- file may get suffix added by low-level d[key] = data # store data at key (overwrites old data if # using an existing key) -data = d[key] # retrieve data at key (raise KeyError if no +data = d[key] # retrieve a COPY of data at key (raise KeyError if no # such key) del d[key] # delete data stored at key (raises KeyError # if no such key) flag = d.has_key(key) # true if the key exists list = d.keys() # a list of all existing keys (slow!) +# as d was opened WITHOUT writeback=True, beware: +d['xx'] = range(4) # this works as expected, but... +d['xx'].append(5) # *this doesn't!* -- d['xx'] is STILL range(4)!!! +# having opened d without writeback=True, you need to code carefully: +temp = d['xx'] # extracts the copy +temp.append(5) # mutates the copy +d['xx'] = temp # stores the copy right back, to persist it +# or, d=shelve.open(filename,writeback=True) would let you just code +# d['xx'].append(5) and have it work as expected, BUT it would also +# consume more memory and make the d.close() operation slower. + d.close() # close it \end{verbatim} diff --git a/Lib/shelve.py b/Lib/shelve.py index 982c4e2..43033ba 100644 --- a/Lib/shelve.py +++ b/Lib/shelve.py @@ -15,8 +15,9 @@ object): d[key] = data # store data at key (overwrites old data if # using an existing key) - data = d[key] # retrieve data at key (raise KeyError if no - # such key) + data = d[key] # retrieve a COPY of the data at key (raise + # KeyError if no such key) -- NOTE that this + # access returns a *copy* of the entry! del d[key] # delete data stored at key (raises KeyError # if no such key) flag = d.has_key(key) # true if the key exists; same as "key in d" @@ -26,6 +27,33 @@ object): Dependent on the implementation, closing a persistent dictionary may or may not be necessary to flush changes to disk. + +Normally, d[key] returns a COPY of the entry. This needs care when +mutable entries are mutated: for example, if d[key] is a list, + d[key].append(anitem) +does NOT modify the entry d[key] itself, as stored in the persistent +mapping -- it only modifies the copy, which is then immediately +discarded, so that the append has NO effect whatsoever. To append an +item to d[key] in a way that will affect the persistent mapping, use: + data = d[key] + data.append(anitem) + d[key] = data + +To avoid the problem with mutable entries, you may pass the keyword +argument writeback=True in the call to shelve.open. When you use: + d = shelve.open(filename, writeback=True) +then d keeps a cache of all entries you access, and writes them all back +to the persistent mapping when you call d.close(). This ensures that +such usage as d[key].append(anitem) works as intended. + +However, using keyword argument writeback=True may consume vast amount +of memory for the cache, and it may make d.close() very slow, if you +access many of d's entries after opening it in this way: d has no way to +check which of the entries you access are mutable and/or which ones you +actually mutate, so it must cache, and write back at close, all of the +entries that you access. You can call d.sync() to write back all the +entries in the cache, and empty the cache (d.sync() also synchronizes +the persistent dictionary on disk, if feasible). """ # Try using cPickle and cStringIO if available. @@ -41,6 +69,7 @@ except ImportError: from StringIO import StringIO import UserDict +import warnings __all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"] @@ -51,9 +80,19 @@ class Shelf(UserDict.DictMixin): See the module's __doc__ string for an overview of the interface. """ - def __init__(self, dict, binary=False): + def __init__(self, dict, protocol=None, writeback=False, binary=None): self.dict = dict - self._binary = binary + if protocol is not None and binary is not None: + raise ValueError, "can't specify both 'protocol' and 'binary'" + if binary is not None: + warnings.warn("The 'binary' argument to Shelf() is deprecated", + PendingDeprecationWarning) + protocol = int(binary) + if protocol is None: + protocol = 0 + self._protocol = protocol + self.writeback = writeback + self.cache = {} def keys(self): return self.dict.keys() @@ -73,19 +112,32 @@ class Shelf(UserDict.DictMixin): return default def __getitem__(self, key): - f = StringIO(self.dict[key]) - return Unpickler(f).load() + try: + value = self.cache[key] + except KeyError: + f = StringIO(self.dict[key]) + value = Unpickler(f).load() + if self.writeback: + self.cache[key] = value + return value def __setitem__(self, key, value): + if self.writeback: + self.cache[key] = value f = StringIO() - p = Pickler(f, self._binary) + p = Pickler(f, self._protocol) p.dump(value) self.dict[key] = f.getvalue() def __delitem__(self, key): del self.dict[key] + try: + del self.cache[key] + except KeyError: + pass def close(self): + self.sync() try: self.dict.close() except: @@ -96,6 +148,12 @@ class Shelf(UserDict.DictMixin): self.close() def sync(self): + if self.writeback and self.cache: + self.writeback = False + for key, entry in self.cache.iteritems(): + self[key] = entry + self.writeback = True + self.cache = {} if hasattr(self.dict, 'sync'): self.dict.sync() @@ -113,8 +171,8 @@ class BsdDbShelf(Shelf): See the module's __doc__ string for an overview of the interface. """ - def __init__(self, dict, binary=False): - Shelf.__init__(self, dict, binary) + def __init__(self, dict, protocol=None, writeback=False, binary=None): + Shelf.__init__(self, dict, protocol, writeback, binary) def set_location(self, key): (key, value) = self.dict.set_location(key) @@ -149,22 +207,25 @@ class DbfilenameShelf(Shelf): See the module's __doc__ string for an overview of the interface. """ - def __init__(self, filename, flag='c', binary=False): + def __init__(self, filename, flag='c', protocol=None, writeback=False, binary=None): import anydbm - Shelf.__init__(self, anydbm.open(filename, flag), binary) + Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback, binary) -def open(filename, flag='c', binary=False): +def open(filename, flag='c', protocol=None, writeback=False, binary=None): """Open a persistent dictionary for reading and writing. - The filename parameter is the base filename for the underlying database. - As a side-effect, an extension may be added to the filename and more - than one file may be created. The optional flag parameter has the - same interpretation as the flag parameter of anydbm.open(). The - optional binary parameter may be set to True to force the use of binary - pickles for serializing data values. + The filename parameter is the base filename for the underlying + database. As a side-effect, an extension may be added to the + filename and more than one file may be created. The optional flag + parameter has the same interpretation as the flag parameter of + anydbm.open(). The optional protocol parameter specifies the + version of the pickle protocol (0, 1, or 2). + + The optional binary parameter is deprecated and may be set to True + to force the use of binary pickles for serializing data values. See the module's __doc__ string for an overview of the interface. """ - return DbfilenameShelf(filename, flag, binary) + return DbfilenameShelf(filename, flag, binary, writeback) diff --git a/Lib/test/test_shelve.py b/Lib/test/test_shelve.py index e7c4b50..a0274d5 100644 --- a/Lib/test/test_shelve.py +++ b/Lib/test/test_shelve.py @@ -28,6 +28,16 @@ class TestCase(unittest.TestCase): for f in glob.glob(self.fn+"*"): os.unlink(f) + def test_proto2_file_shelf(self): + try: + s = shelve.open(self.fn, protocol=2) + s['key1'] = (1,2,3,4) + self.assertEqual(s['key1'], (1,2,3,4)) + s.close() + finally: + for f in glob.glob(self.fn+"*"): + os.unlink(f) + def test_in_memory_shelf(self): d1 = {} s = shelve.Shelf(d1, binary=False) @@ -43,6 +53,27 @@ class TestCase(unittest.TestCase): self.assertEqual(len(d1), 1) self.assertNotEqual(d1, d2) + def test_mutable_entry(self): + d1 = {} + s = shelve.Shelf(d1, protocol=2, writeback=False) + s['key1'] = [1,2,3,4] + self.assertEqual(s['key1'], [1,2,3,4]) + s['key1'].append(5) + self.assertEqual(s['key1'], [1,2,3,4]) + s.close() + + d2 = {} + s = shelve.Shelf(d2, protocol=2, writeback=True) + s['key1'] = [1,2,3,4] + self.assertEqual(s['key1'], [1,2,3,4]) + s['key1'].append(5) + self.assertEqual(s['key1'], [1,2,3,4,5]) + s.close() + + self.assertEqual(len(d1), 1) + self.assertEqual(len(d2), 1) + + from test_userdict import TestMappingProtocol class TestShelveBase(TestMappingProtocol): @@ -56,10 +87,10 @@ class TestShelveBase(TestMappingProtocol): return {"key1":"value1", "key2":2, "key3":(1,2,3)} def _empty_mapping(self): if self._in_mem: - x= shelve.Shelf({}, binary = self._binary) + x= shelve.Shelf({}, **self._args) else: self.counter+=1 - x= shelve.open(self.fn+str(self.counter), binary=self._binary) + x= shelve.open(self.fn+str(self.counter), **self._args) self._db.append(x) return x def tearDown(self): @@ -71,24 +102,32 @@ class TestShelveBase(TestMappingProtocol): os.unlink(f) class TestAsciiFileShelve(TestShelveBase): - _binary = False + _args={'binary':False} _in_mem = False class TestBinaryFileShelve(TestShelveBase): - _binary = True + _args={'binary':True} + _in_mem = False +class TestProto2FileShelve(TestShelveBase): + _args={'protocol':2} _in_mem = False class TestAsciiMemShelve(TestShelveBase): - _binary = False + _args={'binary':False} _in_mem = True class TestBinaryMemShelve(TestShelveBase): - _binary = True + _args={'binary':True} + _in_mem = True +class TestProto2MemShelve(TestShelveBase): + _args={'protocol':2} _in_mem = True def test_main(): suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(TestAsciiFileShelve)) suite.addTest(unittest.makeSuite(TestBinaryFileShelve)) + suite.addTest(unittest.makeSuite(TestProto2FileShelve)) suite.addTest(unittest.makeSuite(TestAsciiMemShelve)) suite.addTest(unittest.makeSuite(TestBinaryMemShelve)) + suite.addTest(unittest.makeSuite(TestProto2MemShelve)) suite.addTest(unittest.makeSuite(TestCase)) test_support.run_suite(suite) diff --git a/Misc/NEWS b/Misc/NEWS index a637100..05f9ac6 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -123,6 +123,9 @@ Extension modules Library ------- +- shelve now supports the optional writeback argument, and exposes + pickle protocol versions. + - Several methods of nntplib.NNTP have grown an optional file argument which specifies a file where to divert the command's output (already supported by the body() method). (SF patch #720468) -- cgit v0.12