diff options
Diffstat (limited to 'Lib/bsddb/dbtables.py')
-rw-r--r-- | Lib/bsddb/dbtables.py | 629 |
1 files changed, 629 insertions, 0 deletions
diff --git a/Lib/bsddb/dbtables.py b/Lib/bsddb/dbtables.py new file mode 100644 index 0000000..4e93451 --- /dev/null +++ b/Lib/bsddb/dbtables.py @@ -0,0 +1,629 @@ +#----------------------------------------------------------------------- +# +# Copyright (C) 2000, 2001 by Autonomous Zone Industries +# +# License: This is free software. You may use this software for any +# purpose including modification/redistribution, so long as +# this header remains intact and that you do not claim any +# rights of ownership or authorship of this software. This +# software has been tested, but no warranty is expressed or +# implied. +# +# -- Gregory P. Smith <greg@electricrain.com> + +# This provides a simple database table interface built on top of +# the Python BerkeleyDB 3 interface. +# +_cvsid = '$Id$' + +import string +import sys +try: + import cPickle + pickle = cPickle +except ImportError: + import pickle +import whrandom +import xdrlib +import re +import copy + +from bsddb3.db import * + + +class TableDBError(StandardError): pass +class TableAlreadyExists(TableDBError): pass + + +class Cond: + """This condition matches everything""" + def __call__(self, s): + return 1 + +class ExactCond(Cond): + """Acts as an exact match condition function""" + def __init__(self, strtomatch): + self.strtomatch = strtomatch + def __call__(self, s): + return s == self.strtomatch + +class PrefixCond(Cond): + """Acts as a condition function for matching a string prefix""" + def __init__(self, prefix): + self.prefix = prefix + def __call__(self, s): + return s[:len(self.prefix)] == self.prefix + +class LikeCond(Cond): + """ + Acts as a function that will match using an SQL 'LIKE' style + string. Case insensitive and % signs are wild cards. + This isn't perfect but it should work for the simple common cases. + """ + def __init__(self, likestr, re_flags=re.IGNORECASE): + # escape python re characters + chars_to_escape = '.*+()[]?' + for char in chars_to_escape : + likestr = string.replace(likestr, char, '\\'+char) + # convert %s to wildcards + self.likestr = string.replace(likestr, '%', '.*') + self.re = re.compile('^'+self.likestr+'$', re_flags) + def __call__(self, s): + return self.re.match(s) + +# +# keys used to store database metadata +# +_table_names_key = '__TABLE_NAMES__' # list of the tables in this db +_columns = '._COLUMNS__' # table_name+this key contains a list of columns +def _columns_key(table) : return table + _columns + +# +# these keys are found within table sub databases +# +_data = '._DATA_.' # this+column+this+rowid key contains table data +_rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each + # row in the table. (no data is stored) +_rowid_str_len = 8 # length in bytes of the unique rowid strings +def _data_key(table, col, rowid) : return table + _data + col + _data + rowid +def _search_col_data_key(table, col) : return table + _data + col + _data +def _search_all_data_key(table) : return table + _data +def _rowid_key(table, rowid) : return table + _rowid + rowid + _rowid +def _search_rowid_key(table) : return table + _rowid + +def contains_metastrings(s) : + """Verify that the given string does not contain any + metadata strings that might interfere with dbtables database operation. + """ + if string.find(s, _table_names_key) >= 0 or \ + string.find(s, _columns) >= 0 or \ + string.find(s, _data) >= 0 or \ + string.find(s, _rowid) >= 0 : + return 1 + else : + return 0 + + +class bsdTableDB : + def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600, recover=0, dbflags=0) : + """bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600) + Open database name in the dbhome BerkeleyDB directory. + Use keyword arguments when calling this constructor. + """ + myflags = DB_THREAD + if create : + myflags = myflags | DB_CREATE + flagsforenv = DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN | dbflags + if recover : + flagsforenv = flagsforenv | DB_RECOVER + self.env = DBEnv() + self.env.set_lk_detect(DB_LOCK_DEFAULT) # enable auto deadlock avoidance + self.env.open(dbhome, myflags | flagsforenv) + if truncate : + myflags = myflags | DB_TRUNCATE + self.db = DB(self.env) + self.db.set_flags(DB_DUP) # allow duplicate entries [warning: be careful w/ metadata] + self.db.open(filename, DB_BTREE, myflags, mode) + + self.dbfilename = filename + + # Initialize the table names list if this is a new database + if not self.db.has_key(_table_names_key) : + self.db.put(_table_names_key, pickle.dumps([], 1)) + + # TODO verify more of the database's metadata? + + self.__tablecolumns = {} + + def __del__(self): + self.close() + + def close(self): + if self.db is not None: + self.db.close() + self.db = None + if self.env is not None: + self.env.close() + self.env = None + + def checkpoint(self, mins=0): + try: + self.env.txn_checkpoint(mins) + except DBIncompleteError: + pass + + def sync(self): + try: + self.db.sync() + except DBIncompleteError: + pass + + def _db_print(self) : + """Print the database to stdout for debugging""" + print "******** Printing raw database for debugging ********" + cur = self.db.cursor() + try: + key, data = cur.first() + while 1 : + print `{key: data}` + next = cur.next() + if next: + key, data = next + else: + cur.close() + return + except DBNotFoundError: + cur.close() + + + def CreateTable(self, table, columns) : + """CreateTable(table, columns) - Create a new table in the database + raises TableDBError if it already exists or for other DB errors. + """ + assert type(columns) == type([]) + txn = None + try: + # checking sanity of the table and column names here on + # table creation will prevent problems elsewhere. + if contains_metastrings(table) : + raise ValueError, "bad table name: contains reserved metastrings" + for column in columns : + if contains_metastrings(column) : + raise ValueError, "bad column name: contains reserved metastrings" + + columnlist_key = _columns_key(table) + if self.db.has_key(columnlist_key) : + raise TableAlreadyExists, "table already exists" + + txn = self.env.txn_begin() + # store the table's column info + self.db.put(columnlist_key, pickle.dumps(columns, 1), txn=txn) + + # add the table name to the tablelist + tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, flags=DB_RMW)) + tablelist.append(table) + self.db.delete(_table_names_key, txn) # delete 1st, incase we opened with DB_DUP + self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn) + + txn.commit() + txn = None + + except DBError, dberror: + if txn : + txn.abort() + raise TableDBError, dberror[1] + + + def ListTableColumns(self, table): + """Return a list of columns in the given table. [] if the table doesn't exist. + """ + assert type(table) == type('') + if contains_metastrings(table) : + raise ValueError, "bad table name: contains reserved metastrings" + + columnlist_key = _columns_key(table) + if not self.db.has_key(columnlist_key): + return [] + pickledcolumnlist = self.db.get(columnlist_key) + if pickledcolumnlist: + return pickle.loads(pickledcolumnlist) + else: + return [] + + def ListTables(self): + """Return a list of tables in this database.""" + pickledtablelist = self.db.get(_table_names_key) + if pickledtablelist: + return pickle.loads(pickledtablelist) + else: + return [] + + def CreateOrExtendTable(self, table, columns): + """CreateOrExtendTable(table, columns) - Create a new table in the database. + If a table of this name already exists, extend it to have any + additional columns present in the given list as well as + all of its current columns. + """ + assert type(columns) == type([]) + try: + self.CreateTable(table, columns) + except TableAlreadyExists: + # the table already existed, add any new columns + txn = None + try: + columnlist_key = _columns_key(table) + txn = self.env.txn_begin() + + # load the current column list + oldcolumnlist = pickle.loads(self.db.get(columnlist_key, txn=txn, flags=DB_RMW)) + # create a hash table for fast lookups of column names in the loop below + oldcolumnhash = {} + for c in oldcolumnlist: + oldcolumnhash[c] = c + + # create a new column list containing both the old and new column names + newcolumnlist = copy.copy(oldcolumnlist) + for c in columns: + if not oldcolumnhash.has_key(c): + newcolumnlist.append(c) + + # store the table's new extended column list + if newcolumnlist != oldcolumnlist : + # delete the old one first since we opened with DB_DUP + self.db.delete(columnlist_key, txn) + self.db.put(columnlist_key, pickle.dumps(newcolumnlist, 1), txn=txn) + + txn.commit() + txn = None + + self.__load_column_info(table) + except DBError, dberror: + if txn: + txn.abort() + raise TableDBError, dberror[1] + + + def __load_column_info(self, table) : + """initialize the self.__tablecolumns dict""" + # check the column names + try: + tcolpickles = self.db.get(_columns_key(table)) + except DBNotFoundError: + raise TableDBError, "unknown table: " + `table` + if not tcolpickles: + raise TableDBError, "unknown table: " + `table` + self.__tablecolumns[table] = pickle.loads(tcolpickles) + + def __new_rowid(self, table, txn=None) : + """Create a new unique row identifier""" + unique = 0 + while not unique : + # Generate a random 64-bit row ID string + # (note: this code has <64 bits of randomness + # but it's plenty for our database id needs!) + p = xdrlib.Packer() + p.pack_int(int(whrandom.random()*2147483647)) + p.pack_int(int(whrandom.random()*2147483647)) + newid = p.get_buffer() + + # Guarantee uniqueness by adding this key to the database + try: + self.db.put(_rowid_key(table, newid), None, txn=txn, flags=DB_NOOVERWRITE) + except DBKeyExistsError: + pass + else: + unique = 1 + + return newid + + + def Insert(self, table, rowdict) : + """Insert(table, datadict) - Insert a new row into the table + using the keys+values from rowdict as the column values. + """ + txn = None + try: + if not self.db.has_key(_columns_key(table)) : + raise TableDBError, "unknown table" + + # check the validity of each column name + if not self.__tablecolumns.has_key(table) : + self.__load_column_info(table) + for column in rowdict.keys() : + if not self.__tablecolumns[table].count(column) : + raise TableDBError, "unknown column: "+`column` + + # get a unique row identifier for this row + rowid = self.__new_rowid(table) + + txn = self.env.txn_begin() + + # insert the row values into the table database + for column, dataitem in rowdict.items() : + # store the value + self.db.put(_data_key(table, column, rowid), dataitem, txn=txn) + + txn.commit() + txn = None + + except DBError, dberror: + if txn : + txn.abort() + self.db.delete(_rowid_key(table, rowid)) + raise TableDBError, dberror[1] + + + def Modify(self, table, conditions={}, mappings={}) : + """Modify(table, conditions) - Modify in rows matching 'conditions' + using mapping functions in 'mappings' + * conditions is a dictionary keyed on column names + containing condition functions expecting the data string as an + argument and returning a boolean. + * mappings is a dictionary keyed on column names containint condition + functions expecting the data string as an argument and returning the + new string for that column. + """ + try: + matching_rowids = self.__Select(table, [], conditions) + + # modify only requested columns + columns = mappings.keys() + for rowid in matching_rowids.keys() : + txn = None + try: + for column in columns : + txn = self.env.txn_begin() + # modify the requested column + try: + dataitem = self.db.get(_data_key(table, column, rowid), txn) + self.db.delete(_data_key(table, column, rowid), txn) + except DBNotFoundError: + dataitem = None # XXXXXXX row key somehow didn't exist, assume no error + dataitem = mappings[column](dataitem) + if dataitem <> None: + self.db.put(_data_key(table, column, rowid), dataitem, txn=txn) + txn.commit() + txn = None + + except DBError, dberror: + if txn : + txn.abort() + raise + + except DBError, dberror: + raise TableDBError, dberror[1] + + def Delete(self, table, conditions={}) : + """Delete(table, conditions) - Delete items matching the given + conditions from the table. + * conditions is a dictionary keyed on column names + containing condition functions expecting the data string as an + argument and returning a boolean. + """ + try: + matching_rowids = self.__Select(table, [], conditions) + + # delete row data from all columns + columns = self.__tablecolumns[table] + for rowid in matching_rowids.keys() : + txn = None + try: + txn = self.env.txn_begin() + for column in columns : + # delete the data key + try: + self.db.delete(_data_key(table, column, rowid), txn) + except DBNotFoundError: + pass # XXXXXXX column may not exist, assume no error + + try: + self.db.delete(_rowid_key(table, rowid), txn) + except DBNotFoundError: + pass # XXXXXXX row key somehow didn't exist, assume no error + txn.commit() + txn = None + except DBError, dberror: + if txn : + txn.abort() + raise + + except DBError, dberror: + raise TableDBError, dberror[1] + + + def Select(self, table, columns, conditions={}) : + """Select(table, conditions) - retrieve specific row data + Returns a list of row column->value mapping dictionaries. + * columns is a list of which column data to return. If + columns is None, all columns will be returned. + * conditions is a dictionary keyed on column names + containing callable conditions expecting the data string as an + argument and returning a boolean. + """ + try: + if not self.__tablecolumns.has_key(table) : + self.__load_column_info(table) + if columns is None : + columns = self.__tablecolumns[table] + matching_rowids = self.__Select(table, columns, conditions) + except DBError, dberror: + raise TableDBError, dberror[1] + + # return the matches as a list of dictionaries + return matching_rowids.values() + + + def __Select(self, table, columns, conditions) : + """__Select() - Used to implement Select and Delete (above) + Returns a dictionary keyed on rowids containing dicts + holding the row data for columns listed in the columns param + that match the given conditions. + * conditions is a dictionary keyed on column names + containing callable conditions expecting the data string as an + argument and returning a boolean. + """ + # check the validity of each column name + if not self.__tablecolumns.has_key(table) : + self.__load_column_info(table) + if columns is None : + columns = self.tablecolumns[table] + for column in (columns + conditions.keys()) : + if not self.__tablecolumns[table].count(column) : + raise TableDBError, "unknown column: "+`column` + + # keyed on rows that match so far, containings dicts keyed on + # column names containing the data for that row and column. + matching_rowids = {} + + rejected_rowids = {} # keys are rowids that do not match + + # attempt to sort the conditions in such a way as to minimize full column lookups + def cmp_conditions(atuple, btuple): + a = atuple[1] + b = btuple[1] + if type(a) == type(b) : + if isinstance(a, PrefixCond) and isinstance(b, PrefixCond): + return cmp(len(b.prefix), len(a.prefix)) # longest prefix first + if isinstance(a, LikeCond) and isinstance(b, LikeCond): + return cmp(len(b.likestr), len(a.likestr)) # longest likestr first + return 0 + if isinstance(a, ExactCond): + return -1 + if isinstance(b, ExactCond): + return 1 + if isinstance(a, PrefixCond): + return -1 + if isinstance(b, PrefixCond): + return 1 + # leave all unknown condition callables alone as equals + return 0 + + conditionlist = conditions.items() + conditionlist.sort(cmp_conditions) + + # Apply conditions to column data to find what we want + cur = self.db.cursor() + column_num = -1 + for column, condition in conditionlist : + column_num = column_num + 1 + searchkey = _search_col_data_key(table, column) + # speedup: don't linear search columns within loop + if column in columns : + savethiscolumndata = 1 # save the data for return + else : + savethiscolumndata = 0 # data only used for selection + + try: + key, data = cur.set_range(searchkey) + while key[:len(searchkey)] == searchkey : + # extract the rowid from the key + rowid = key[-_rowid_str_len:] + + if not rejected_rowids.has_key(rowid) : + # if no condition was specified or the condition + # succeeds, add row to our match list. + if not condition or condition(data) : + # only create new entries in matcing_rowids on + # the first pass, otherwise reject the + # rowid as it must not have matched + # the previous passes + if column_num == 0 : + if not matching_rowids.has_key(rowid) : + matching_rowids[rowid] = {} + if savethiscolumndata : + matching_rowids[rowid][column] = data + else : + rejected_rowids[rowid] = rowid + else : + if matching_rowids.has_key(rowid) : + del matching_rowids[rowid] + rejected_rowids[rowid] = rowid + + key, data = cur.next() + + except DBError, dberror: + if dberror[0] != DB_NOTFOUND : + raise + continue + + cur.close() + + # we're done selecting rows, garbage collect the reject list + del rejected_rowids + + # extract any remaining desired column data from the + # database for the matching rows. + if len(columns) > 0 : + for rowid, rowdata in matching_rowids.items() : + for column in columns : + if rowdata.has_key(column) : + continue + try: + rowdata[column] = self.db.get(_data_key(table, column, rowid)) + except DBError, dberror: + if dberror[0] != DB_NOTFOUND : + raise + rowdata[column] = None + + # return the matches + return matching_rowids + + + def Drop(self, table) : + """Remove an entire table from the database + """ + txn = None + try: + txn = self.env.txn_begin() + + # delete the column list + self.db.delete(_columns_key(table), txn) + + cur = self.db.cursor(txn) + + # delete all keys containing this tables column and row info + table_key = _search_all_data_key(table) + while 1 : + try: + key, data = cur.set_range(table_key) + except DBNotFoundError: + break + # only delete items in this table + if key[:len(table_key)] != table_key : + break + cur.delete() + + # delete all rowids used by this table + table_key = _search_rowid_key(table) + while 1 : + try: + key, data = cur.set_range(table_key) + except DBNotFoundError: + break + # only delete items in this table + if key[:len(table_key)] != table_key : + break + cur.delete() + + cur.close() + + # delete the tablename from the table name list + tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, flags=DB_RMW)) + try: + tablelist.remove(table) + except ValueError: + pass # hmm, it wasn't there, oh well, that's what we want. + self.db.delete(_table_names_key, txn) # delete 1st, incase we opened with DB_DUP + self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn) + + txn.commit() + txn = None + + if self.__tablecolumns.has_key(table) : + del self.__tablecolumns[table] + + except DBError, dberror: + if txn : + txn.abort() + raise TableDBError, dberror[1] + |