shelve.py 7.39 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
"""Manage shelves of pickled objects.

A "shelf" is a persistent, dictionary-like object.  The difference
with dbm databases is that the values (not the keys!) in a shelf can
be essentially arbitrary Python objects -- anything that the "pickle"
module can handle.  This includes most class instances, recursive data
types, and objects containing lots of shared sub-objects.  The keys
are ordinary strings.

To summarize the interface (key is a string, data is an arbitrary
object):

13 14
        import shelve
        d = shelve.open(filename) # open, with (g)dbm filename -- no suffix
15

16 17
        d[key] = data   # store data at key (overwrites old data if
                        # using an existing key)
Tim Peters's avatar
Tim Peters committed
18
        data = d[key]   # retrieve a COPY of the data at key (raise
19 20
                        # KeyError if no such key) -- NOTE that this
                        # access returns a *copy* of the entry!
21 22
        del d[key]      # delete data stored at key (raises KeyError
                        # if no such key)
23
        flag = d.has_key(key)   # true if the key exists; same as "key in d"
24
        list = d.keys() # a list of all existing keys (slow!)
25

26
        d.close()       # close it
27 28 29

Dependent on the implementation, closing a persistent dictionary may
or may not be necessary to flush changes to disk.
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56

Normally, d[key] returns a COPY of the entry.  This needs care when
mutable entries are mutated: for example, if d[key] is a list,
        d[key].append(anitem)
does NOT modify the entry d[key] itself, as stored in the persistent
mapping -- it only modifies the copy, which is then immediately
discarded, so that the append has NO effect whatsoever.  To append an
item to d[key] in a way that will affect the persistent mapping, use:
        data = d[key]
        data.append(anitem)
        d[key] = data

To avoid the problem with mutable entries, you may pass the keyword
argument writeback=True in the call to shelve.open.  When you use:
        d = shelve.open(filename, writeback=True)
then d keeps a cache of all entries you access, and writes them all back
to the persistent mapping when you call d.close().  This ensures that
such usage as d[key].append(anitem) works as intended.

However, using keyword argument writeback=True may consume vast amount
of memory for the cache, and it may make d.close() very slow, if you
access many of d's entries after opening it in this way: d has no way to
check which of the entries you access are mutable and/or which ones you
actually mutate, so it must cache, and write back at close, all of the
entries that you access.  You can call d.sync() to write back all the
entries in the cache, and empty the cache (d.sync() also synchronizes
the persistent dictionary on disk, if feasible).
57
"""
58

59 60 61
# Try using cPickle and cStringIO if available.

try:
Tim Peters's avatar
Tim Peters committed
62
    from cPickle import Pickler, Unpickler
63
except ImportError:
Tim Peters's avatar
Tim Peters committed
64
    from pickle import Pickler, Unpickler
65 66

try:
Tim Peters's avatar
Tim Peters committed
67
    from cStringIO import StringIO
68
except ImportError:
Tim Peters's avatar
Tim Peters committed
69
    from StringIO import StringIO
70

71
import UserDict
72
import warnings
73

74
__all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"]
75

76
class Shelf(UserDict.DictMixin):
Tim Peters's avatar
Tim Peters committed
77 78 79 80 81 82
    """Base class for shelf implementations.

    This is initialized with a dictionary-like object.
    See the module's __doc__ string for an overview of the interface.
    """

83
    def __init__(self, dict, protocol=None, writeback=False):
Tim Peters's avatar
Tim Peters committed
84
        self.dict = dict
85 86 87 88 89
        if protocol is None:
            protocol = 0
        self._protocol = protocol
        self.writeback = writeback
        self.cache = {}
Tim Peters's avatar
Tim Peters committed
90 91 92 93 94 95 96 97

    def keys(self):
        return self.dict.keys()

    def __len__(self):
        return len(self.dict)

    def has_key(self, key):
98
        return self.dict.has_key(key)
Tim Peters's avatar
Tim Peters committed
99

100
    def __contains__(self, key):
101
        return self.dict.has_key(key)
102

Tim Peters's avatar
Tim Peters committed
103
    def get(self, key, default=None):
104
        if self.dict.has_key(key):
Tim Peters's avatar
Tim Peters committed
105 106 107 108
            return self[key]
        return default

    def __getitem__(self, key):
109 110 111 112 113 114 115 116
        try:
            value = self.cache[key]
        except KeyError:
            f = StringIO(self.dict[key])
            value = Unpickler(f).load()
            if self.writeback:
                self.cache[key] = value
        return value
Tim Peters's avatar
Tim Peters committed
117 118

    def __setitem__(self, key, value):
119 120
        if self.writeback:
            self.cache[key] = value
Tim Peters's avatar
Tim Peters committed
121
        f = StringIO()
122
        p = Pickler(f, self._protocol)
Tim Peters's avatar
Tim Peters committed
123 124 125 126 127
        p.dump(value)
        self.dict[key] = f.getvalue()

    def __delitem__(self, key):
        del self.dict[key]
128 129 130 131
        try:
            del self.cache[key]
        except KeyError:
            pass
Tim Peters's avatar
Tim Peters committed
132 133

    def close(self):
134
        self.sync()
Tim Peters's avatar
Tim Peters committed
135 136
        try:
            self.dict.close()
137
        except AttributeError:
Tim Peters's avatar
Tim Peters committed
138 139 140 141
            pass
        self.dict = 0

    def __del__(self):
142 143 144
        if not hasattr(self, 'writeback'):
            # __init__ didn't succeed, so don't bother closing
            return
Tim Peters's avatar
Tim Peters committed
145 146 147
        self.close()

    def sync(self):
148 149 150 151 152 153
        if self.writeback and self.cache:
            self.writeback = False
            for key, entry in self.cache.iteritems():
                self[key] = entry
            self.writeback = True
            self.cache = {}
Tim Peters's avatar
Tim Peters committed
154 155 156
        if hasattr(self.dict, 'sync'):
            self.dict.sync()

157

158
class BsdDbShelf(Shelf):
Tim Peters's avatar
Tim Peters committed
159
    """Shelf implementation using the "BSD" db interface.
160

Tim Peters's avatar
Tim Peters committed
161 162
    This adds methods first(), next(), previous(), last() and
    set_location() that have no counterpart in [g]dbm databases.
163

Tim Peters's avatar
Tim Peters committed
164 165 166
    The actual database must be opened using one of the "bsddb"
    modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or
    bsddb.rnopen) and passed to the constructor.
167

Tim Peters's avatar
Tim Peters committed
168 169
    See the module's __doc__ string for an overview of the interface.
    """
170

171 172
    def __init__(self, dict, protocol=None, writeback=False):
        Shelf.__init__(self, dict, protocol, writeback)
173

Tim Peters's avatar
Tim Peters committed
174 175 176 177
    def set_location(self, key):
        (key, value) = self.dict.set_location(key)
        f = StringIO(value)
        return (key, Unpickler(f).load())
178

Tim Peters's avatar
Tim Peters committed
179 180 181 182
    def next(self):
        (key, value) = self.dict.next()
        f = StringIO(value)
        return (key, Unpickler(f).load())
183

Tim Peters's avatar
Tim Peters committed
184 185 186 187
    def previous(self):
        (key, value) = self.dict.previous()
        f = StringIO(value)
        return (key, Unpickler(f).load())
188

Tim Peters's avatar
Tim Peters committed
189 190 191 192
    def first(self):
        (key, value) = self.dict.first()
        f = StringIO(value)
        return (key, Unpickler(f).load())
193

Tim Peters's avatar
Tim Peters committed
194 195 196 197
    def last(self):
        (key, value) = self.dict.last()
        f = StringIO(value)
        return (key, Unpickler(f).load())
198 199 200


class DbfilenameShelf(Shelf):
Tim Peters's avatar
Tim Peters committed
201 202 203 204 205
    """Shelf implementation using the "anydbm" generic dbm interface.

    This is initialized with the filename for the dbm database.
    See the module's __doc__ string for an overview of the interface.
    """
206

207
    def __init__(self, filename, flag='c', protocol=None, writeback=False):
Tim Peters's avatar
Tim Peters committed
208
        import anydbm
209
        Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback)
210

211

212
def open(filename, flag='c', protocol=None, writeback=False):
Tim Peters's avatar
Tim Peters committed
213 214
    """Open a persistent dictionary for reading and writing.

215 216 217 218 219 220 221
    The filename parameter is the base filename for the underlying
    database.  As a side-effect, an extension may be added to the
    filename and more than one file may be created.  The optional flag
    parameter has the same interpretation as the flag parameter of
    anydbm.open(). The optional protocol parameter specifies the
    version of the pickle protocol (0, 1, or 2).

Tim Peters's avatar
Tim Peters committed
222 223
    See the module's __doc__ string for an overview of the interface.
    """
224

225
    return DbfilenameShelf(filename, flag, protocol, writeback)