#----------------------------------------------------------------------- # # Copyright (C) 2000, 2001 by Autonomous Zone Industries # # License: This is free software. You may use this software for any # purpose including modification/redistribution, so long as # this header remains intact and that you do not claim any # rights of ownership or authorship of this software. This # software has been tested, but no warranty is expressed or # implied. # # -- Gregory P. Smith # This provides a simple database table interface built on top of # the Python BerkeleyDB 3 interface. # _cvsid = '$Id$' import string import sys try: import cPickle pickle = cPickle except ImportError: import pickle import whrandom import xdrlib import re import copy from bsddb.db import * class TableDBError(StandardError): pass class TableAlreadyExists(TableDBError): pass class Cond: """This condition matches everything""" def __call__(self, s): return 1 class ExactCond(Cond): """Acts as an exact match condition function""" def __init__(self, strtomatch): self.strtomatch = strtomatch def __call__(self, s): return s == self.strtomatch class PrefixCond(Cond): """Acts as a condition function for matching a string prefix""" def __init__(self, prefix): self.prefix = prefix def __call__(self, s): return s[:len(self.prefix)] == self.prefix class LikeCond(Cond): """ Acts as a function that will match using an SQL 'LIKE' style string. Case insensitive and % signs are wild cards. This isn't perfect but it should work for the simple common cases. """ def __init__(self, likestr, re_flags=re.IGNORECASE): # escape python re characters chars_to_escape = '.*+()[]?' for char in chars_to_escape : likestr = string.replace(likestr, char, '\\'+char) # convert %s to wildcards self.likestr = string.replace(likestr, '%', '.*') self.re = re.compile('^'+self.likestr+'$', re_flags) def __call__(self, s): return self.re.match(s) # # keys used to store database metadata # _table_names_key = '__TABLE_NAMES__' # list of the tables in this db _columns = '._COLUMNS__' # table_name+this key contains a list of columns def _columns_key(table) : return table + _columns # # these keys are found within table sub databases # _data = '._DATA_.' # this+column+this+rowid key contains table data _rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each # row in the table. (no data is stored) _rowid_str_len = 8 # length in bytes of the unique rowid strings def _data_key(table, col, rowid) : return table + _data + col + _data + rowid def _search_col_data_key(table, col) : return table + _data + col + _data def _search_all_data_key(table) : return table + _data def _rowid_key(table, rowid) : return table + _rowid + rowid + _rowid def _search_rowid_key(table) : return table + _rowid def contains_metastrings(s) : """Verify that the given string does not contain any metadata strings that might interfere with dbtables database operation. """ if string.find(s, _table_names_key) >= 0 or \ string.find(s, _columns) >= 0 or \ string.find(s, _data) >= 0 or \ string.find(s, _rowid) >= 0 : return 1 else : return 0 class bsdTableDB : def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600, recover=0, dbflags=0) : """bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600) Open database name in the dbhome BerkeleyDB directory. Use keyword arguments when calling this constructor. """ myflags = DB_THREAD if create : myflags = myflags | DB_CREATE flagsforenv = DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN | dbflags if recover : flagsforenv = flagsforenv | DB_RECOVER self.env = DBEnv() self.env.set_lk_detect(DB_LOCK_DEFAULT) # enable auto deadlock avoidance self.env.open(dbhome, myflags | flagsforenv) if truncate : myflags = myflags | DB_TRUNCATE self.db = DB(self.env) self.db.set_flags(DB_DUP) # allow duplicate entries [warning: be careful w/ metadata] self.db.open(filename, DB_BTREE, myflags, mode) self.dbfilename = filename # Initialize the table names list if this is a new database if not self.db.has_key(_table_names_key) : self.db.put(_table_names_key, pickle.dumps([], 1)) # TODO verify more of the database's metadata? self.__tablecolumns = {} def __del__(self): self.close() def close(self): if self.db is not None: self.db.close() self.db = None if self.env is not None: self.env.close() self.env = None def checkpoint(self, mins=0): try: self.env.txn_checkpoint(mins) except DBIncompleteError: pass def sync(self): try: self.db.sync() except DBIncompleteError: pass def _db_print(self) : """Print the database to stdout for debugging""" print "******** Printing raw database for debugging ********" cur = self.db.cursor() try: key, data = cur.first() while 1 : print `{key: data}` next = cur.next() if next: key, data = next else: cur.close() return except DBNotFoundError: cur.close() def CreateTable(self, table, columns) : """CreateTable(table, columns) - Create a new table in the database raises TableDBError if it already exists or for other DB errors. """ assert type(columns) == type([]) txn = None try: # checking sanity of the table and column names here on # table creation will prevent problems elsewhere. if contains_metastrings(table) : raise ValueError, "bad table name: contains reserved metastrings" for column in columns : if contains_metastrings(column) : raise ValueError, "bad column name: contains reserved metastrings" columnlist_key = _columns_key(table) if self.db.has_key(columnlist_key) : raise TableAlreadyExists, "table already exists" txn = self.env.txn_begin() # store the table's column info self.db.put(columnlist_key, pickle.dumps(columns, 1), txn=txn) # add the table name to the tablelist tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, flags=DB_RMW)) tablelist.append(table) self.db.delete(_table_names_key, txn) # delete 1st, incase we opened with DB_DUP self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn) txn.commit() txn = None except DBError, dberror: if txn : txn.abort() raise TableDBError, dberror[1] def ListTableColumns(self, table): """Return a list of columns in the given table. [] if the table doesn't exist. """ assert type(table) == type('') if contains_metastrings(table) : raise ValueError, "bad table name: contains reserved metastrings" columnlist_key = _columns_key(table) if not self.db.has_key(columnlist_key): return [] pickledcolumnlist = self.db.get(columnlist_key) if pickledcolumnlist: return pickle.loads(pickledcolumnlist) else: return [] def ListTables(self): """Return a list of tables in this database.""" pickledtablelist = self.db.get(_table_names_key) if pickledtablelist: return pickle.loads(pickledtablelist) else: return [] def CreateOrExtendTable(self, table, columns): """CreateOrExtendTable(table, columns) - Create a new table in the database. If a table of this name already exists, extend it to have any additional columns present in the given list as well as all of its current columns. """ assert type(columns) == type([]) try: self.CreateTable(table, columns) except TableAlreadyExists: # the table already existed, add any new columns txn = None try: columnlist_key = _columns_key(table) txn = self.env.txn_begin() # load the current column list oldcolumnlist = pickle.loads(self.db.get(columnlist_key, txn=txn, flags=DB_RMW)) # create a hash table for fast lookups of column names in the loop below oldcolumnhash = {} for c in oldcolumnlist: oldcolumnhash[c] = c # create a new column list containing both the old and new column names newcolumnlist = copy.copy(oldcolumnlist) for c in columns: if not oldcolumnhash.has_key(c): newcolumnlist.append(c) # store the table's new extended column list if newcolumnlist != oldcolumnlist : # delete the old one first since we opened with DB_DUP self.db.delete(columnlist_key, txn) self.db.put(columnlist_key, pickle.dumps(newcolumnlist, 1), txn=txn) txn.commit() txn = None self.__load_column_info(table) except DBError, dberror: if txn: txn.abort() raise TableDBError, dberror[1] def __load_column_info(self, table) : """initialize the self.__tablecolumns dict""" # check the column names try: tcolpickles = self.db.get(_columns_key(table)) except DBNotFoundError: raise TableDBError, "unknown table: " + `table` if not tcolpickles: raise TableDBError, "unknown table: " + `table` self.__tablecolumns[table] = pickle.loads(tcolpickles) def __new_rowid(self, table, txn=None) : """Create a new unique row identifier""" unique = 0 while not unique : # Generate a random 64-bit row ID string # (note: this code has <64 bits of randomness # but it's plenty for our database id needs!) p = xdrlib.Packer() p.pack_int(int(whrandom.random()*2147483647)) p.pack_int(int(whrandom.random()*2147483647)) newid = p.get_buffer() # Guarantee uniqueness by adding this key to the database try: self.db.put(_rowid_key(table, newid), None, txn=txn, flags=DB_NOOVERWRITE) except DBKeyExistsError: pass else: unique = 1 return newid def Insert(self, table, rowdict) : """Insert(table, datadict) - Insert a new row into the table using the keys+values from rowdict as the column values. """ txn = None try: if not self.db.has_key(_columns_key(table)) : raise TableDBError, "unknown table" # check the validity of each column name if not self.__tablecolumns.has_key(table) : self.__load_column_info(table) for column in rowdict.keys() : if not self.__tablecolumns[table].count(column) : raise TableDBError, "unknown column: "+`column` # get a unique row identifier for this row rowid = self.__new_rowid(table) txn = self.env.txn_begin() # insert the row values into the table database for column, dataitem in rowdict.items() : # store the value self.db.put(_data_key(table, column, rowid), dataitem, txn=txn) txn.commit() txn = None except DBError, dberror: if txn : txn.abort() self.db.delete(_rowid_key(table, rowid)) raise TableDBError, dberror[1] def Modify(self, table, conditions={}, mappings={}) : """Modify(table, conditions) - Modify in rows matching 'conditions' using mapping functions in 'mappings' * conditions is a dictionary keyed on column names containing condition functions expecting the data string as an argument and returning a boolean. * mappings is a dictionary keyed on column names containint condition functions expecting the data string as an argument and returning the new string for that column. """ try: matching_rowids = self.__Select(table, [], conditions) # modify only requested columns columns = mappings.keys() for rowid in matching_rowids.keys() : txn = None try: for column in columns : txn = self.env.txn_begin() # modify the requested column try: dataitem = self.db.get(_data_key(table, column, rowid), txn) self.db.delete(_data_key(table, column, rowid), txn) except DBNotFoundError: dataitem = None # XXXXXXX row key somehow didn't exist, assume no error dataitem = mappings[column](dataitem) if dataitem <> None: self.db.put(_data_key(table, column, rowid), dataitem, txn=txn) txn.commit() txn = None except DBError, dberror: if txn : txn.abort() raise except DBError, dberror: raise TableDBError, dberror[1] def Delete(self, table, conditions={}) : """Delete(table, conditions) - Delete items matching the given conditions from the table. * conditions is a dictionary keyed on column names containing condition functions expecting the data string as an argument and returning a boolean. """ try: matching_rowids = self.__Select(table, [], conditions) # delete row data from all columns columns = self.__tablecolumns[table] for rowid in matching_rowids.keys() : txn = None try: txn = self.env.txn_begin() for column in columns : # delete the data key try: self.db.delete(_data_key(table, column, rowid), txn) except DBNotFoundError: pass # XXXXXXX column may not exist, assume no error try: self.db.delete(_rowid_key(table, rowid), txn) except DBNotFoundError: pass # XXXXXXX row key somehow didn't exist, assume no error txn.commit() txn = None except DBError, dberror: if txn : txn.abort() raise except DBError, dberror: raise TableDBError, dberror[1] def Select(self, table, columns, conditions={}) : """Select(table, conditions) - retrieve specific row data Returns a list of row column->value mapping dictionaries. * columns is a list of which column data to return. If columns is None, all columns will be returned. * conditions is a dictionary keyed on column names containing callable conditions expecting the data string as an argument and returning a boolean. """ try: if not self.__tablecolumns.has_key(table) : self.__load_column_info(table) if columns is None : columns = self.__tablecolumns[table] matching_rowids = self.__Select(table, columns, conditions) except DBError, dberror: raise TableDBError, dberror[1] # return the matches as a list of dictionaries return matching_rowids.values() def __Select(self, table, columns, conditions) : """__Select() - Used to implement Select and Delete (above) Returns a dictionary keyed on rowids containing dicts holding the row data for columns listed in the columns param that match the given conditions. * conditions is a dictionary keyed on column names containing callable conditions expecting the data string as an argument and returning a boolean. """ # check the validity of each column name if not self.__tablecolumns.has_key(table) : self.__load_column_info(table) if columns is None : columns = self.tablecolumns[table] for column in (columns + conditions.keys()) : if not self.__tablecolumns[table].count(column) : raise TableDBError, "unknown column: "+`column` # keyed on rows that match so far, containings dicts keyed on # column names containing the data for that row and column. matching_rowids = {} rejected_rowids = {} # keys are rowids that do not match # attempt to sort the conditions in such a way as to minimize full column lookups def cmp_conditions(atuple, btuple): a = atuple[1] b = btuple[1] if type(a) == type(b) : if isinstance(a, PrefixCond) and isinstance(b, PrefixCond): return cmp(len(b.prefix), len(a.prefix)) # longest prefix first if isinstance(a, LikeCond) and isinstance(b, LikeCond): return cmp(len(b.likestr), len(a.likestr)) # longest likestr first return 0 if isinstance(a, ExactCond): return -1 if isinstance(b, ExactCond): return 1 if isinstance(a, PrefixCond): return -1 if isinstance(b, PrefixCond): return 1 # leave all unknown condition callables alone as equals return 0 conditionlist = conditions.items() conditionlist.sort(cmp_conditions) # Apply conditions to column data to find what we want cur = self.db.cursor() column_num = -1 for column, condition in conditionlist : column_num = column_num + 1 searchkey = _search_col_data_key(table, column) # speedup: don't linear search columns within loop if column in columns : savethiscolumndata = 1 # save the data for return else : savethiscolumndata = 0 # data only used for selection try: key, data = cur.set_range(searchkey) while key[:len(searchkey)] == searchkey : # extract the rowid from the key rowid = key[-_rowid_str_len:] if not rejected_rowids.has_key(rowid) : # if no condition was specified or the condition # succeeds, add row to our match list. if not condition or condition(data) : # only create new entries in matcing_rowids on # the first pass, otherwise reject the # rowid as it must not have matched # the previous passes if column_num == 0 : if not matching_rowids.has_key(rowid) : matching_rowids[rowid] = {} if savethiscolumndata : matching_rowids[rowid][column] = data else : rejected_rowids[rowid] = rowid else : if matching_rowids.has_key(rowid) : del matching_rowids[rowid] rejected_rowids[rowid] = rowid key, data = cur.next() except DBError, dberror: if dberror[0] != DB_NOTFOUND : raise continue cur.close() # we're done selecting rows, garbage collect the reject list del rejected_rowids # extract any remaining desired column data from the # database for the matching rows. if len(columns) > 0 : for rowid, rowdata in matching_rowids.items() : for column in columns : if rowdata.has_key(column) : continue try: rowdata[column] = self.db.get(_data_key(table, column, rowid)) except DBError, dberror: if dberror[0] != DB_NOTFOUND : raise rowdata[column] = None # return the matches return matching_rowids def Drop(self, table) : """Remove an entire table from the database """ txn = None try: txn = self.env.txn_begin() # delete the column list self.db.delete(_columns_key(table), txn) cur = self.db.cursor(txn) # delete all keys containing this tables column and row info table_key = _search_all_data_key(table) while 1 : try: key, data = cur.set_range(table_key) except DBNotFoundError: break # only delete items in this table if key[:len(table_key)] != table_key : break cur.delete() # delete all rowids used by this table table_key = _search_rowid_key(table) while 1 : try: key, data = cur.set_range(table_key) except DBNotFoundError: break # only delete items in this table if key[:len(table_key)] != table_key : break cur.delete() cur.close() # delete the tablename from the table name list tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, flags=DB_RMW)) try: tablelist.remove(table) except ValueError: pass # hmm, it wasn't there, oh well, that's what we want. self.db.delete(_table_names_key, txn) # delete 1st, incase we opened with DB_DUP self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn) txn.commit() txn = None if self.__tablecolumns.has_key(table) : del self.__tablecolumns[table] except DBError, dberror: if txn : txn.abort() raise TableDBError, dberror[1]