dbtables.py 25 KB
Newer Older
1 2 3
#-----------------------------------------------------------------------
#
# Copyright (C) 2000, 2001 by Autonomous Zone Industries
4
# Copyright (C) 2002 Gregory P. Smith
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
#
# License:      This is free software.  You may use this software for any
#               purpose including modification/redistribution, so long as
#               this header remains intact and that you do not claim any
#               rights of ownership or authorship of this software.  This
#               software has been tested, but no warranty is expressed or
#               implied.
#
#   --  Gregory P. Smith <greg@electricrain.com>

# This provides a simple database table interface built on top of
# the Python BerkeleyDB 3 interface.
#
_cvsid = '$Id$'

import re
21
import sys
22
import copy
23
import struct
24
import random
25 26
from types import ListType, StringType
import cPickle as pickle
27

28
try:
29 30 31
    # For Pythons w/distutils pybsddb
    from bsddb3.db import *
except ImportError:
32 33
    # For Python 2.3
    from bsddb.db import *
34

35 36 37 38 39 40
# XXX(nnorwitz): is this correct? DBIncompleteError is conditional in _bsddb.c
try:
    DBIncompleteError
except NameError:
    class DBIncompleteError(Exception):
        pass
41

42 43 44 45
class TableDBError(StandardError):
    pass
class TableAlreadyExists(TableDBError):
    pass
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66


class Cond:
    """This condition matches everything"""
    def __call__(self, s):
        return 1

class ExactCond(Cond):
    """Acts as an exact match condition function"""
    def __init__(self, strtomatch):
        self.strtomatch = strtomatch
    def __call__(self, s):
        return s == self.strtomatch

class PrefixCond(Cond):
    """Acts as a condition function for matching a string prefix"""
    def __init__(self, prefix):
        self.prefix = prefix
    def __call__(self, s):
        return s[:len(self.prefix)] == self.prefix

67 68 69 70 71 72 73
class PostfixCond(Cond):
    """Acts as a condition function for matching a string postfix"""
    def __init__(self, postfix):
        self.postfix = postfix
    def __call__(self, s):
        return s[-len(self.postfix):] == self.postfix

74 75 76 77 78 79 80 81 82 83
class LikeCond(Cond):
    """
    Acts as a function that will match using an SQL 'LIKE' style
    string.  Case insensitive and % signs are wild cards.
    This isn't perfect but it should work for the simple common cases.
    """
    def __init__(self, likestr, re_flags=re.IGNORECASE):
        # escape python re characters
        chars_to_escape = '.*+()[]?'
        for char in chars_to_escape :
84
            likestr = likestr.replace(char, '\\'+char)
85
        # convert %s to wildcards
86
        self.likestr = likestr.replace('%', '.*')
87 88 89 90 91 92 93 94 95
        self.re = re.compile('^'+self.likestr+'$', re_flags)
    def __call__(self, s):
        return self.re.match(s)

#
# keys used to store database metadata
#
_table_names_key = '__TABLE_NAMES__'  # list of the tables in this db
_columns = '._COLUMNS__'  # table_name+this key contains a list of columns
96 97 98

def _columns_key(table):
    return table + _columns
99 100 101 102 103 104 105 106

#
# these keys are found within table sub databases
#
_data =  '._DATA_.'  # this+column+this+rowid key contains table data
_rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each
                     # row in the table.  (no data is stored)
_rowid_str_len = 8   # length in bytes of the unique rowid strings
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121

def _data_key(table, col, rowid):
    return table + _data + col + _data + rowid

def _search_col_data_key(table, col):
    return table + _data + col + _data

def _search_all_data_key(table):
    return table + _data

def _rowid_key(table, rowid):
    return table + _rowid + rowid + _rowid

def _search_rowid_key(table):
    return table + _rowid
122 123 124 125 126

def contains_metastrings(s) :
    """Verify that the given string does not contain any
    metadata strings that might interfere with dbtables database operation.
    """
127 128 129 130 131
    if (s.find(_table_names_key) >= 0 or
        s.find(_columns) >= 0 or
        s.find(_data) >= 0 or
        s.find(_rowid) >= 0):
        # Then
132
        return 1
133
    else:
134 135 136 137
        return 0


class bsdTableDB :
138
    def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600,
139
                 recover=0, dbflags=0):
140 141
        """bsdTableDB(filename, dbhome, create=0, truncate=0, mode=0600)

142 143 144
        Open database name in the dbhome BerkeleyDB directory.
        Use keyword arguments when calling this constructor.
        """
145
        self.db = None
146
        myflags = DB_THREAD
147 148 149 150 151 152 153 154 155 156
        if create:
            myflags |= DB_CREATE
        flagsforenv = (DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG |
                       DB_INIT_TXN | dbflags)
        # DB_AUTO_COMMIT isn't a valid flag for env.open()
        try:
            dbflags |= DB_AUTO_COMMIT
        except AttributeError:
            pass
        if recover:
157 158
            flagsforenv = flagsforenv | DB_RECOVER
        self.env = DBEnv()
159 160
        # enable auto deadlock avoidance
        self.env.set_lk_detect(DB_LOCK_DEFAULT)
161
        self.env.open(dbhome, myflags | flagsforenv)
162 163
        if truncate:
            myflags |= DB_TRUNCATE
164
        self.db = DB(self.env)
Gregory P. Smith's avatar
Gregory P. Smith committed
165 166 167
        # this code relies on DBCursor.set* methods to raise exceptions
        # rather than returning None
        self.db.set_get_returns_none(1)
168 169 170
        # allow duplicate entries [warning: be careful w/ metadata]
        self.db.set_flags(DB_DUP)
        self.db.open(filename, DB_BTREE, dbflags | myflags, mode)
171 172
        self.dbfilename = filename
        # Initialize the table names list if this is a new database
173 174 175 176 177 178 179 180 181 182
        txn = self.env.txn_begin()
        try:
            if not self.db.has_key(_table_names_key, txn):
                self.db.put(_table_names_key, pickle.dumps([], 1), txn=txn)
        # Yes, bare except
        except:
            txn.abort()
            raise
        else:
            txn.commit()
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
        # TODO verify more of the database's metadata?
        self.__tablecolumns = {}

    def __del__(self):
        self.close()

    def close(self):
        if self.db is not None:
            self.db.close()
            self.db = None
        if self.env is not None:
            self.env.close()
            self.env = None

    def checkpoint(self, mins=0):
        try:
            self.env.txn_checkpoint(mins)
        except DBIncompleteError:
            pass

    def sync(self):
        try:
            self.db.sync()
        except DBIncompleteError:
            pass

    def _db_print(self) :
        """Print the database to stdout for debugging"""
        print "******** Printing raw database for debugging ********"
        cur = self.db.cursor()
        try:
            key, data = cur.first()
215
            while 1:
216
                print repr({key: data})
217 218 219 220 221 222 223 224 225 226
                next = cur.next()
                if next:
                    key, data = next
                else:
                    cur.close()
                    return
        except DBNotFoundError:
            cur.close()


227
    def CreateTable(self, table, columns):
228 229
        """CreateTable(table, columns) - Create a new table in the database.

230 231
        raises TableDBError if it already exists or for other DB errors.
        """
232
        assert isinstance(columns, ListType)
233 234 235 236
        txn = None
        try:
            # checking sanity of the table and column names here on
            # table creation will prevent problems elsewhere.
237 238 239
            if contains_metastrings(table):
                raise ValueError(
                    "bad table name: contains reserved metastrings")
240
            for column in columns :
241 242 243
                if contains_metastrings(column):
                    raise ValueError(
                        "bad column name: contains reserved metastrings")
244 245

            columnlist_key = _columns_key(table)
246
            if self.db.has_key(columnlist_key):
247 248 249 250 251 252 253
                raise TableAlreadyExists, "table already exists"

            txn = self.env.txn_begin()
            # store the table's column info
            self.db.put(columnlist_key, pickle.dumps(columns, 1), txn=txn)

            # add the table name to the tablelist
254 255
            tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn,
                                                 flags=DB_RMW))
256
            tablelist.append(table)
257 258
            # delete 1st, in case we opened with DB_DUP
            self.db.delete(_table_names_key, txn)
259 260 261 262 263
            self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)

            txn.commit()
            txn = None
        except DBError, dberror:
264
            if txn:
265 266 267 268 269
                txn.abort()
            raise TableDBError, dberror[1]


    def ListTableColumns(self, table):
270 271
        """Return a list of columns in the given table.
        [] if the table doesn't exist.
272
        """
273 274
        assert isinstance(table, StringType)
        if contains_metastrings(table):
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
            raise ValueError, "bad table name: contains reserved metastrings"

        columnlist_key = _columns_key(table)
        if not self.db.has_key(columnlist_key):
            return []
        pickledcolumnlist = self.db.get(columnlist_key)
        if pickledcolumnlist:
            return pickle.loads(pickledcolumnlist)
        else:
            return []

    def ListTables(self):
        """Return a list of tables in this database."""
        pickledtablelist = self.db.get(_table_names_key)
        if pickledtablelist:
            return pickle.loads(pickledtablelist)
        else:
            return []

    def CreateOrExtendTable(self, table, columns):
295 296
        """CreateOrExtendTable(table, columns)

297 298
        Create a new table in the database.

299 300 301 302
        If a table of this name already exists, extend it to have any
        additional columns present in the given list as well as
        all of its current columns.
        """
303
        assert isinstance(columns, ListType)
304 305 306 307 308 309 310 311 312 313
        try:
            self.CreateTable(table, columns)
        except TableAlreadyExists:
            # the table already existed, add any new columns
            txn = None
            try:
                columnlist_key = _columns_key(table)
                txn = self.env.txn_begin()

                # load the current column list
314 315 316 317
                oldcolumnlist = pickle.loads(
                    self.db.get(columnlist_key, txn=txn, flags=DB_RMW))
                # create a hash table for fast lookups of column names in the
                # loop below
318 319 320 321
                oldcolumnhash = {}
                for c in oldcolumnlist:
                    oldcolumnhash[c] = c

322 323
                # create a new column list containing both the old and new
                # column names
324 325 326 327 328 329 330 331 332
                newcolumnlist = copy.copy(oldcolumnlist)
                for c in columns:
                    if not oldcolumnhash.has_key(c):
                        newcolumnlist.append(c)

                # store the table's new extended column list
                if newcolumnlist != oldcolumnlist :
                    # delete the old one first since we opened with DB_DUP
                    self.db.delete(columnlist_key, txn)
333 334 335
                    self.db.put(columnlist_key,
                                pickle.dumps(newcolumnlist, 1),
                                txn=txn)
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352

                txn.commit()
                txn = None

                self.__load_column_info(table)
            except DBError, dberror:
                if txn:
                    txn.abort()
                raise TableDBError, dberror[1]


    def __load_column_info(self, table) :
        """initialize the self.__tablecolumns dict"""
        # check the column names
        try:
            tcolpickles = self.db.get(_columns_key(table))
        except DBNotFoundError:
353
            raise TableDBError, "unknown table: %r" % (table,)
354
        if not tcolpickles:
355
            raise TableDBError, "unknown table: %r" % (table,)
356 357
        self.__tablecolumns[table] = pickle.loads(tcolpickles)

358
    def __new_rowid(self, table, txn) :
359 360
        """Create a new unique row identifier"""
        unique = 0
361
        while not unique:
362
            # Generate a random 64-bit row ID string
363
            # (note: might have <64 bits of true randomness
364
            # but it's plenty for our database id needs!)
365 366
            blist = []
            for x in xrange(_rowid_str_len):
367
                blist.append(random.randint(0,255))
368
            newid = struct.pack('B'*_rowid_str_len, *blist)
369 370 371

            # Guarantee uniqueness by adding this key to the database
            try:
372 373 374
                self.db.put(_rowid_key(table, newid), None, txn=txn,
                            flags=DB_NOOVERWRITE)
            except DBKeyExistError:
375 376 377 378 379 380 381 382 383 384 385 386 387
                pass
            else:
                unique = 1

        return newid


    def Insert(self, table, rowdict) :
        """Insert(table, datadict) - Insert a new row into the table
        using the keys+values from rowdict as the column values.
        """
        txn = None
        try:
388
            if not self.db.has_key(_columns_key(table)):
389 390 391
                raise TableDBError, "unknown table"

            # check the validity of each column name
392
            if not self.__tablecolumns.has_key(table):
393 394
                self.__load_column_info(table)
            for column in rowdict.keys() :
395
                if not self.__tablecolumns[table].count(column):
396
                    raise TableDBError, "unknown column: %r" % (column,)
397 398 399

            # get a unique row identifier for this row
            txn = self.env.txn_begin()
400
            rowid = self.__new_rowid(table, txn=txn)
401 402

            # insert the row values into the table database
403
            for column, dataitem in rowdict.items():
404 405 406 407 408 409 410
                # store the value
                self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)

            txn.commit()
            txn = None

        except DBError, dberror:
411 412 413 414 415 416
            # WIBNI we could just abort the txn and re-raise the exception?
            # But no, because TableDBError is not related to DBError via
            # inheritance, so it would be backwards incompatible.  Do the next
            # best thing.
            info = sys.exc_info()
            if txn:
417 418
                txn.abort()
                self.db.delete(_rowid_key(table, rowid))
419
            raise TableDBError, dberror[1], info[2]
420 421


422
    def Modify(self, table, conditions={}, mappings={}):
423 424 425 426 427 428 429 430 431
        """Modify(table, conditions={}, mappings={}) - Modify items in rows matching 'conditions' using mapping functions in 'mappings'

        * table - the table name
        * conditions - a dictionary keyed on column names containing
          a condition callable expecting the data string as an
          argument and returning a boolean.
        * mappings - a dictionary keyed on column names containing a
          condition callable expecting the data string as an argument and
          returning the new string for that column.
432 433 434 435 436 437
        """
        try:
            matching_rowids = self.__Select(table, [], conditions)

            # modify only requested columns
            columns = mappings.keys()
438
            for rowid in matching_rowids.keys():
439 440
                txn = None
                try:
441
                    for column in columns:
442 443 444
                        txn = self.env.txn_begin()
                        # modify the requested column
                        try:
445 446
                            dataitem = self.db.get(
                                _data_key(table, column, rowid),
447
                                txn=txn)
448 449 450
                            self.db.delete(
                                _data_key(table, column, rowid),
                                txn)
451
                        except DBNotFoundError:
452 453 454
                             # XXXXXXX row key somehow didn't exist, assume no
                             # error
                            dataitem = None
455 456
                        dataitem = mappings[column](dataitem)
                        if dataitem <> None:
457 458 459
                            self.db.put(
                                _data_key(table, column, rowid),
                                dataitem, txn=txn)
460 461 462
                        txn.commit()
                        txn = None

463 464
                # catch all exceptions here since we call unknown callables
                except:
465
                    if txn:
466 467 468 469 470 471
                        txn.abort()
                    raise

        except DBError, dberror:
            raise TableDBError, dberror[1]

472
    def Delete(self, table, conditions={}):
473 474
        """Delete(table, conditions) - Delete items matching the given
        conditions from the table.
475 476 477 478

        * conditions - a dictionary keyed on column names containing
          condition functions expecting the data string as an
          argument and returning a boolean.
479 480 481 482 483 484
        """
        try:
            matching_rowids = self.__Select(table, [], conditions)

            # delete row data from all columns
            columns = self.__tablecolumns[table]
485
            for rowid in matching_rowids.keys():
486 487 488
                txn = None
                try:
                    txn = self.env.txn_begin()
489
                    for column in columns:
490 491
                        # delete the data key
                        try:
492 493
                            self.db.delete(_data_key(table, column, rowid),
                                           txn)
494
                        except DBNotFoundError:
495 496
                            # XXXXXXX column may not exist, assume no error
                            pass
497 498 499 500

                    try:
                        self.db.delete(_rowid_key(table, rowid), txn)
                    except DBNotFoundError:
501 502
                        # XXXXXXX row key somehow didn't exist, assume no error
                        pass
503 504 505
                    txn.commit()
                    txn = None
                except DBError, dberror:
506
                    if txn:
507 508 509 510 511 512
                        txn.abort()
                    raise
        except DBError, dberror:
            raise TableDBError, dberror[1]


513
    def Select(self, table, columns, conditions={}):
514
        """Select(table, columns, conditions) - retrieve specific row data
515
        Returns a list of row column->value mapping dictionaries.
516 517

        * columns - a list of which column data to return.  If
518
          columns is None, all columns will be returned.
519
        * conditions - a dictionary keyed on column names
520 521 522 523
          containing callable conditions expecting the data string as an
          argument and returning a boolean.
        """
        try:
524
            if not self.__tablecolumns.has_key(table):
525
                self.__load_column_info(table)
526
            if columns is None:
527 528 529 530 531 532 533 534
                columns = self.__tablecolumns[table]
            matching_rowids = self.__Select(table, columns, conditions)
        except DBError, dberror:
            raise TableDBError, dberror[1]
        # return the matches as a list of dictionaries
        return matching_rowids.values()


535
    def __Select(self, table, columns, conditions):
536 537 538 539 540 541 542 543 544
        """__Select() - Used to implement Select and Delete (above)
        Returns a dictionary keyed on rowids containing dicts
        holding the row data for columns listed in the columns param
        that match the given conditions.
        * conditions is a dictionary keyed on column names
        containing callable conditions expecting the data string as an
        argument and returning a boolean.
        """
        # check the validity of each column name
545
        if not self.__tablecolumns.has_key(table):
546
            self.__load_column_info(table)
547
        if columns is None:
548
            columns = self.tablecolumns[table]
549 550
        for column in (columns + conditions.keys()):
            if not self.__tablecolumns[table].count(column):
551
                raise TableDBError, "unknown column: %r" % (column,)
552 553 554 555

        # keyed on rows that match so far, containings dicts keyed on
        # column names containing the data for that row and column.
        matching_rowids = {}
556 557
        # keys are rowids that do not match
        rejected_rowids = {}
558

559 560
        # attempt to sort the conditions in such a way as to minimize full
        # column lookups
561 562 563
        def cmp_conditions(atuple, btuple):
            a = atuple[1]
            b = btuple[1]
564
            if type(a) is type(b):
565
                if isinstance(a, PrefixCond) and isinstance(b, PrefixCond):
566 567
                    # longest prefix first
                    return cmp(len(b.prefix), len(a.prefix))
568
                if isinstance(a, LikeCond) and isinstance(b, LikeCond):
569 570
                    # longest likestr first
                    return cmp(len(b.likestr), len(a.likestr))
571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
                return 0
            if isinstance(a, ExactCond):
                return -1
            if isinstance(b, ExactCond):
                return 1
            if isinstance(a, PrefixCond):
                return -1
            if isinstance(b, PrefixCond):
                return 1
            # leave all unknown condition callables alone as equals
            return 0

        conditionlist = conditions.items()
        conditionlist.sort(cmp_conditions)

        # Apply conditions to column data to find what we want
        cur = self.db.cursor()
        column_num = -1
589
        for column, condition in conditionlist:
590 591 592
            column_num = column_num + 1
            searchkey = _search_col_data_key(table, column)
            # speedup: don't linear search columns within loop
593
            if column in columns:
594
                savethiscolumndata = 1  # save the data for return
595
            else:
596 597 598 599
                savethiscolumndata = 0  # data only used for selection

            try:
                key, data = cur.set_range(searchkey)
600
                while key[:len(searchkey)] == searchkey:
601 602 603
                    # extract the rowid from the key
                    rowid = key[-_rowid_str_len:]

604
                    if not rejected_rowids.has_key(rowid):
605 606
                        # if no condition was specified or the condition
                        # succeeds, add row to our match list.
607 608
                        if not condition or condition(data):
                            if not matching_rowids.has_key(rowid):
609
                                matching_rowids[rowid] = {}
610
                            if savethiscolumndata:
611
                                matching_rowids[rowid][column] = data
612 613
                        else:
                            if matching_rowids.has_key(rowid):
614 615 616 617 618 619
                                del matching_rowids[rowid]
                            rejected_rowids[rowid] = rowid

                    key, data = cur.next()

            except DBError, dberror:
620
                if dberror[0] != DB_NOTFOUND:
621 622 623 624 625 626 627 628 629 630
                    raise
                continue

        cur.close()

        # we're done selecting rows, garbage collect the reject list
        del rejected_rowids

        # extract any remaining desired column data from the
        # database for the matching rows.
631 632 633 634
        if len(columns) > 0:
            for rowid, rowdata in matching_rowids.items():
                for column in columns:
                    if rowdata.has_key(column):
635 636
                        continue
                    try:
637 638
                        rowdata[column] = self.db.get(
                            _data_key(table, column, rowid))
639
                    except DBError, dberror:
640
                        if dberror[0] != DB_NOTFOUND:
641 642 643 644 645 646 647
                            raise
                        rowdata[column] = None

        # return the matches
        return matching_rowids


648 649
    def Drop(self, table):
        """Remove an entire table from the database"""
650 651 652 653 654 655 656 657 658 659 660
        txn = None
        try:
            txn = self.env.txn_begin()

            # delete the column list
            self.db.delete(_columns_key(table), txn)

            cur = self.db.cursor(txn)

            # delete all keys containing this tables column and row info
            table_key = _search_all_data_key(table)
661
            while 1:
662 663 664 665 666
                try:
                    key, data = cur.set_range(table_key)
                except DBNotFoundError:
                    break
                # only delete items in this table
667
                if key[:len(table_key)] != table_key:
668 669 670 671 672
                    break
                cur.delete()

            # delete all rowids used by this table
            table_key = _search_rowid_key(table)
673
            while 1:
674 675 676 677 678
                try:
                    key, data = cur.set_range(table_key)
                except DBNotFoundError:
                    break
                # only delete items in this table
679
                if key[:len(table_key)] != table_key:
680 681 682 683 684 685
                    break
                cur.delete()

            cur.close()

            # delete the tablename from the table name list
686 687
            tablelist = pickle.loads(
                self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
688 689 690
            try:
                tablelist.remove(table)
            except ValueError:
691 692 693 694
                # hmm, it wasn't there, oh well, that's what we want.
                pass
            # delete 1st, incase we opened with DB_DUP
            self.db.delete(_table_names_key, txn)
695 696 697 698 699
            self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)

            txn.commit()
            txn = None

700
            if self.__tablecolumns.has_key(table):
701 702 703
                del self.__tablecolumns[table]

        except DBError, dberror:
704
            if txn:
705 706
                txn.abort()
            raise TableDBError, dberror[1]