summaryrefslogtreecommitdiffstats
path: root/Lib/dbm
diff options
context:
space:
mode:
authorBrett Cannon <bcannon@gmail.com>2008-11-21 00:17:53 (GMT)
committerBrett Cannon <bcannon@gmail.com>2008-11-21 00:17:53 (GMT)
commit58425d31036e07bb415e66d37d8bd59516595a9e (patch)
treefb0e7f0fa3d7cc2a31a2887b2bf279433135342a /Lib/dbm
parent6e0d68e9e25fd6ea692a74a57447e98b87a606ad (diff)
downloadcpython-58425d31036e07bb415e66d37d8bd59516595a9e.zip
cpython-58425d31036e07bb415e66d37d8bd59516595a9e.tar.gz
cpython-58425d31036e07bb415e66d37d8bd59516595a9e.tar.bz2
Make dbm.dumb encode strings as UTF-8. Also fix it so it accepts bytes and
strings. Closes issue #3799.
Diffstat (limited to 'Lib/dbm')
-rw-r--r--Lib/dbm/dumb.py28
1 files changed, 17 insertions, 11 deletions
diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py
index 76f4a63..f37f141 100644
--- a/Lib/dbm/dumb.py
+++ b/Lib/dbm/dumb.py
@@ -84,6 +84,7 @@ class _Database(collections.MutableMapping):
for line in f:
line = line.rstrip()
key, pos_and_siz_pair = eval(line)
+ key = key.encode('Latin-1')
self._index[key] = pos_and_siz_pair
f.close()
@@ -110,13 +111,16 @@ class _Database(collections.MutableMapping):
f = self._io.open(self._dirfile, 'w')
self._chmod(self._dirfile)
for key, pos_and_siz_pair in self._index.items():
- f.write("%r, %r\n" % (key, pos_and_siz_pair))
+ # Use Latin-1 since it has no qualms with any value in any
+ # position; UTF-8, though, does care sometimes.
+ f.write("%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair))
f.close()
sync = _commit
def __getitem__(self, key):
- key = key.decode("latin-1")
+ if isinstance(key, str):
+ key = key.encode('utf-8')
pos, siz = self._index[key] # may raise KeyError
f = _io.open(self._datfile, 'rb')
f.seek(pos)
@@ -161,11 +165,12 @@ class _Database(collections.MutableMapping):
f.close()
def __setitem__(self, key, val):
- if not isinstance(key, bytes):
- raise TypeError("keys must be bytes")
- key = key.decode("latin-1") # hashable bytes
+ if isinstance(key, str):
+ key = key.encode('utf-8')
+ elif not isinstance(key, (bytes, bytearray)):
+ raise TypeError("keys must be bytes or strings")
if not isinstance(val, (bytes, bytearray)):
- raise TypeError("values must be byte strings")
+ raise TypeError("values must be bytes")
if key not in self._index:
self._addkey(key, self._addval(val))
else:
@@ -191,7 +196,8 @@ class _Database(collections.MutableMapping):
# (so that _commit() never gets called).
def __delitem__(self, key):
- key = key.decode("latin-1")
+ if isinstance(key, str):
+ key = key.encode('utf-8')
# The blocks used by the associated value are lost.
del self._index[key]
# XXX It's unclear why we do a _commit() here (the code always
@@ -201,14 +207,14 @@ class _Database(collections.MutableMapping):
self._commit()
def keys(self):
- return [key.encode("latin-1") for key in self._index.keys()]
+ return list(self._index.keys())
def items(self):
- return [(key.encode("latin-1"), self[key.encode("latin-1")])
- for key in self._index.keys()]
+ return [(key, self[key]) for key in self._index.keys()]
def __contains__(self, key):
- key = key.decode("latin-1")
+ if isinstance(key, str):
+ key = key.encode('utf-8')
return key in self._index
def iterkeys(self):