shelve.py 7.77 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
"""Manage shelves of pickled objects.

A "shelf" is a persistent, dictionary-like object.  The difference
with dbm databases is that the values (not the keys!) in a shelf can
be essentially arbitrary Python objects -- anything that the "pickle"
module can handle.  This includes most class instances, recursive data
types, and objects containing lots of shared sub-objects.  The keys
are ordinary strings.

To summarize the interface (key is a string, data is an arbitrary
object):

13 14
        import shelve
        d = shelve.open(filename) # open, with (g)dbm filename -- no suffix
15

16 17
        d[key] = data   # store data at key (overwrites old data if
                        # using an existing key)
18 19 20
        data = d[key]   # retrieve a COPY of the data at key (raise 
                        # KeyError if no such key) -- NOTE that this
                        # access returns a *copy* of the entry!
21 22
        del d[key]      # delete data stored at key (raises KeyError
                        # if no such key)
23
        flag = d.has_key(key)   # true if the key exists; same as "key in d"
24
        list = d.keys() # a list of all existing keys (slow!)
25

26
        d.close()       # close it
27 28 29

Dependent on the implementation, closing a persistent dictionary may
or may not be necessary to flush changes to disk.
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56

Normally, d[key] returns a COPY of the entry.  This needs care when
mutable entries are mutated: for example, if d[key] is a list,
        d[key].append(anitem)
does NOT modify the entry d[key] itself, as stored in the persistent
mapping -- it only modifies the copy, which is then immediately
discarded, so that the append has NO effect whatsoever.  To append an
item to d[key] in a way that will affect the persistent mapping, use:
        data = d[key]
        data.append(anitem)
        d[key] = data

To avoid the problem with mutable entries, you may pass the keyword
argument writeback=True in the call to shelve.open.  When you use:
        d = shelve.open(filename, writeback=True)
then d keeps a cache of all entries you access, and writes them all back
to the persistent mapping when you call d.close().  This ensures that
such usage as d[key].append(anitem) works as intended.

However, using keyword argument writeback=True may consume vast amount
of memory for the cache, and it may make d.close() very slow, if you
access many of d's entries after opening it in this way: d has no way to
check which of the entries you access are mutable and/or which ones you
actually mutate, so it must cache, and write back at close, all of the
entries that you access.  You can call d.sync() to write back all the
entries in the cache, and empty the cache (d.sync() also synchronizes
the persistent dictionary on disk, if feasible).
57
"""
58

59 60 61
# Try using cPickle and cStringIO if available.

try:
Tim Peters's avatar
Tim Peters committed
62
    from cPickle import Pickler, Unpickler
63
except ImportError:
Tim Peters's avatar
Tim Peters committed
64
    from pickle import Pickler, Unpickler
65 66

try:
Tim Peters's avatar
Tim Peters committed
67
    from cStringIO import StringIO
68
except ImportError:
Tim Peters's avatar
Tim Peters committed
69
    from StringIO import StringIO
70

71
import UserDict
72
import warnings
73

74
__all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"]
75

76
class Shelf(UserDict.DictMixin):
Tim Peters's avatar
Tim Peters committed
77 78 79 80 81 82
    """Base class for shelf implementations.

    This is initialized with a dictionary-like object.
    See the module's __doc__ string for an overview of the interface.
    """

83
    def __init__(self, dict, protocol=None, writeback=False, binary=None):
Tim Peters's avatar
Tim Peters committed
84
        self.dict = dict
85 86 87 88 89 90 91 92 93 94 95
        if protocol is not None and binary is not None:
            raise ValueError, "can't specify both 'protocol' and 'binary'"
        if binary is not None:
            warnings.warn("The 'binary' argument to Shelf() is deprecated",
                          PendingDeprecationWarning)
            protocol = int(binary)
        if protocol is None:
            protocol = 0
        self._protocol = protocol
        self.writeback = writeback
        self.cache = {}
Tim Peters's avatar
Tim Peters committed
96 97 98 99 100 101 102 103 104 105

    def keys(self):
        return self.dict.keys()

    def __len__(self):
        return len(self.dict)

    def has_key(self, key):
        return self.dict.has_key(key)

106 107 108
    def __contains__(self, key):
        return self.dict.has_key(key)

Tim Peters's avatar
Tim Peters committed
109 110 111 112 113 114
    def get(self, key, default=None):
        if self.dict.has_key(key):
            return self[key]
        return default

    def __getitem__(self, key):
115 116 117 118 119 120 121 122
        try:
            value = self.cache[key]
        except KeyError:
            f = StringIO(self.dict[key])
            value = Unpickler(f).load()
            if self.writeback:
                self.cache[key] = value
        return value
Tim Peters's avatar
Tim Peters committed
123 124

    def __setitem__(self, key, value):
125 126
        if self.writeback:
            self.cache[key] = value
Tim Peters's avatar
Tim Peters committed
127
        f = StringIO()
128
        p = Pickler(f, self._protocol)
Tim Peters's avatar
Tim Peters committed
129 130 131 132 133
        p.dump(value)
        self.dict[key] = f.getvalue()

    def __delitem__(self, key):
        del self.dict[key]
134 135 136 137
        try:
            del self.cache[key]
        except KeyError:
            pass
Tim Peters's avatar
Tim Peters committed
138 139

    def close(self):
140
        self.sync()
Tim Peters's avatar
Tim Peters committed
141 142 143 144 145 146 147 148 149 150
        try:
            self.dict.close()
        except:
            pass
        self.dict = 0

    def __del__(self):
        self.close()

    def sync(self):
151 152 153 154 155 156
        if self.writeback and self.cache:
            self.writeback = False
            for key, entry in self.cache.iteritems():
                self[key] = entry
            self.writeback = True
            self.cache = {}
Tim Peters's avatar
Tim Peters committed
157 158 159
        if hasattr(self.dict, 'sync'):
            self.dict.sync()

160

161
class BsdDbShelf(Shelf):
Tim Peters's avatar
Tim Peters committed
162
    """Shelf implementation using the "BSD" db interface.
163

Tim Peters's avatar
Tim Peters committed
164 165
    This adds methods first(), next(), previous(), last() and
    set_location() that have no counterpart in [g]dbm databases.
166

Tim Peters's avatar
Tim Peters committed
167 168 169
    The actual database must be opened using one of the "bsddb"
    modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or
    bsddb.rnopen) and passed to the constructor.
170

Tim Peters's avatar
Tim Peters committed
171 172
    See the module's __doc__ string for an overview of the interface.
    """
173

174 175
    def __init__(self, dict, protocol=None, writeback=False, binary=None):
        Shelf.__init__(self, dict, protocol, writeback, binary)
176

Tim Peters's avatar
Tim Peters committed
177 178 179 180
    def set_location(self, key):
        (key, value) = self.dict.set_location(key)
        f = StringIO(value)
        return (key, Unpickler(f).load())
181

Tim Peters's avatar
Tim Peters committed
182 183 184 185
    def next(self):
        (key, value) = self.dict.next()
        f = StringIO(value)
        return (key, Unpickler(f).load())
186

Tim Peters's avatar
Tim Peters committed
187 188 189 190
    def previous(self):
        (key, value) = self.dict.previous()
        f = StringIO(value)
        return (key, Unpickler(f).load())
191

Tim Peters's avatar
Tim Peters committed
192 193 194 195
    def first(self):
        (key, value) = self.dict.first()
        f = StringIO(value)
        return (key, Unpickler(f).load())
196

Tim Peters's avatar
Tim Peters committed
197 198 199 200
    def last(self):
        (key, value) = self.dict.last()
        f = StringIO(value)
        return (key, Unpickler(f).load())
201 202 203


class DbfilenameShelf(Shelf):
Tim Peters's avatar
Tim Peters committed
204 205 206 207 208
    """Shelf implementation using the "anydbm" generic dbm interface.

    This is initialized with the filename for the dbm database.
    See the module's __doc__ string for an overview of the interface.
    """
209

210
    def __init__(self, filename, flag='c', protocol=None, writeback=False, binary=None):
Tim Peters's avatar
Tim Peters committed
211
        import anydbm
212
        Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback, binary)
213

214

215
def open(filename, flag='c', protocol=None, writeback=False, binary=None):
Tim Peters's avatar
Tim Peters committed
216 217
    """Open a persistent dictionary for reading and writing.

218 219 220 221 222 223 224 225 226
    The filename parameter is the base filename for the underlying
    database.  As a side-effect, an extension may be added to the
    filename and more than one file may be created.  The optional flag
    parameter has the same interpretation as the flag parameter of
    anydbm.open(). The optional protocol parameter specifies the
    version of the pickle protocol (0, 1, or 2).

    The optional binary parameter is deprecated and may be set to True
    to force the use of binary pickles for serializing data values.
227

Tim Peters's avatar
Tim Peters committed
228 229
    See the module's __doc__ string for an overview of the interface.
    """
230

231
    return DbfilenameShelf(filename, flag, binary, writeback)