diff options
author | Brett Cannon <bcannon@gmail.com> | 2008-11-21 00:17:53 (GMT) |
---|---|---|
committer | Brett Cannon <bcannon@gmail.com> | 2008-11-21 00:17:53 (GMT) |
commit | 58425d31036e07bb415e66d37d8bd59516595a9e (patch) | |
tree | fb0e7f0fa3d7cc2a31a2887b2bf279433135342a /Lib/dbm | |
parent | 6e0d68e9e25fd6ea692a74a57447e98b87a606ad (diff) | |
download | cpython-58425d31036e07bb415e66d37d8bd59516595a9e.zip cpython-58425d31036e07bb415e66d37d8bd59516595a9e.tar.gz cpython-58425d31036e07bb415e66d37d8bd59516595a9e.tar.bz2 |
Make dbm.dumb encode strings as UTF-8. Also fix it so it accepts bytes and
strings.
Closes issue #3799.
Diffstat (limited to 'Lib/dbm')
-rw-r--r-- | Lib/dbm/dumb.py | 28 |
1 files changed, 17 insertions, 11 deletions
diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py index 76f4a63..f37f141 100644 --- a/Lib/dbm/dumb.py +++ b/Lib/dbm/dumb.py @@ -84,6 +84,7 @@ class _Database(collections.MutableMapping): for line in f: line = line.rstrip() key, pos_and_siz_pair = eval(line) + key = key.encode('Latin-1') self._index[key] = pos_and_siz_pair f.close() @@ -110,13 +111,16 @@ class _Database(collections.MutableMapping): f = self._io.open(self._dirfile, 'w') self._chmod(self._dirfile) for key, pos_and_siz_pair in self._index.items(): - f.write("%r, %r\n" % (key, pos_and_siz_pair)) + # Use Latin-1 since it has no qualms with any value in any + # position; UTF-8, though, does care sometimes. + f.write("%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair)) f.close() sync = _commit def __getitem__(self, key): - key = key.decode("latin-1") + if isinstance(key, str): + key = key.encode('utf-8') pos, siz = self._index[key] # may raise KeyError f = _io.open(self._datfile, 'rb') f.seek(pos) @@ -161,11 +165,12 @@ class _Database(collections.MutableMapping): f.close() def __setitem__(self, key, val): - if not isinstance(key, bytes): - raise TypeError("keys must be bytes") - key = key.decode("latin-1") # hashable bytes + if isinstance(key, str): + key = key.encode('utf-8') + elif not isinstance(key, (bytes, bytearray)): + raise TypeError("keys must be bytes or strings") if not isinstance(val, (bytes, bytearray)): - raise TypeError("values must be byte strings") + raise TypeError("values must be bytes") if key not in self._index: self._addkey(key, self._addval(val)) else: @@ -191,7 +196,8 @@ class _Database(collections.MutableMapping): # (so that _commit() never gets called). def __delitem__(self, key): - key = key.decode("latin-1") + if isinstance(key, str): + key = key.encode('utf-8') # The blocks used by the associated value are lost. del self._index[key] # XXX It's unclear why we do a _commit() here (the code always @@ -201,14 +207,14 @@ class _Database(collections.MutableMapping): self._commit() def keys(self): - return [key.encode("latin-1") for key in self._index.keys()] + return list(self._index.keys()) def items(self): - return [(key.encode("latin-1"), self[key.encode("latin-1")]) - for key in self._index.keys()] + return [(key, self[key]) for key in self._index.keys()] def __contains__(self, key): - key = key.decode("latin-1") + if isinstance(key, str): + key = key.encode('utf-8') return key in self._index def iterkeys(self): |