multifile.py 4.57 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
"""A readline()-style interface to the parts of a multipart message.

The MultiFile class makes each part of a multipart message "feel" like
an ordinary file, as long as you use fp.readline().  Allows recursive
use, for nested multipart messages.  Probably best used together
with module mimetools.

Suggested use:

real_fp = open(...)
fp = MultiFile(real_fp)

"read some lines from fp"
fp.push(separator)
while 1:
Tim Peters's avatar
Tim Peters committed
16 17
        "read lines from fp until it returns an empty string" (A)
        if not fp.next(): break
18 19 20 21 22 23
fp.pop()
"read remaining lines from fp until it returns an empty string"

The latter sequence may be used recursively at (A).
It is also allowed to use multiple push()...pop() sequences.

24
If seekable is given as 0, the class code will not do the bookkeeping
25 26 27 28
it normally attempts in order to make seeks relative to the beginning of the
current file part.  This may be useful when using MultiFile with a non-
seekable stream object.
"""
29

30 31
__all__ = ["MultiFile","Error"]

32
class Error(Exception):
Tim Peters's avatar
Tim Peters committed
33
    pass
34 35

class MultiFile:
36

Tim Peters's avatar
Tim Peters committed
37 38 39 40
    seekable = 0

    def __init__(self, fp, seekable=1):
        self.fp = fp
41
        self.stack = []
Tim Peters's avatar
Tim Peters committed
42 43 44 45 46
        self.level = 0
        self.last = 0
        if seekable:
            self.seekable = 1
            self.start = self.fp.tell()
47
            self.posstack = []
Tim Peters's avatar
Tim Peters committed
48 49 50 51

    def tell(self):
        if self.level > 0:
            return self.lastpos
52
        return self.fp.tell() - self.start
Tim Peters's avatar
Tim Peters committed
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87

    def seek(self, pos, whence=0):
        here = self.tell()
        if whence:
            if whence == 1:
                pos = pos + here
            elif whence == 2:
                if self.level > 0:
                    pos = pos + self.lastpos
                else:
                    raise Error, "can't use whence=2 yet"
        if not 0 <= pos <= here or \
                        self.level > 0 and pos > self.lastpos:
            raise Error, 'bad MultiFile.seek() call'
        self.fp.seek(pos + self.start)
        self.level = 0
        self.last = 0

    def readline(self):
        if self.level > 0:
            return ''
        line = self.fp.readline()
        # Real EOF?
        if not line:
            self.level = len(self.stack)
            self.last = (self.level > 0)
            if self.last:
                raise Error, 'sudden EOF in MultiFile.readline()'
            return ''
        assert self.level == 0
        # Fast check to see if this is just data
        if self.is_data(line):
            return line
        else:
            # Ignore trailing whitespace on marker lines
88
            marker = line.rstrip()
Tim Peters's avatar
Tim Peters committed
89 90
        # No?  OK, try to match a boundary.
        # Return the line (unstripped) if we don't.
91
        for i, sep in enumerate(reversed(self.stack)):
Tim Peters's avatar
Tim Peters committed
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
            if marker == self.section_divider(sep):
                self.last = 0
                break
            elif marker == self.end_marker(sep):
                self.last = 1
                break
        else:
            return line
        # We only get here if we see a section divider or EOM line
        if self.seekable:
            self.lastpos = self.tell() - len(line)
        self.level = i+1
        if self.level > 1:
            raise Error,'Missing endmarker in MultiFile.readline()'
        return ''

    def readlines(self):
        list = []
        while 1:
            line = self.readline()
            if not line: break
            list.append(line)
        return list

    def read(self): # Note: no size argument -- read until EOF only!
117
        return ''.join(self.readlines())
Tim Peters's avatar
Tim Peters committed
118 119 120 121 122 123 124 125 126 127 128 129 130 131

    def next(self):
        while self.readline(): pass
        if self.level > 1 or self.last:
            return 0
        self.level = 0
        self.last = 0
        if self.seekable:
            self.start = self.fp.tell()
        return 1

    def push(self, sep):
        if self.level > 0:
            raise Error, 'bad MultiFile.push() call'
132
        self.stack.append(sep)
Tim Peters's avatar
Tim Peters committed
133
        if self.seekable:
134
            self.posstack.append(self.start)
Tim Peters's avatar
Tim Peters committed
135 136 137 138 139 140 141 142 143 144
            self.start = self.fp.tell()

    def pop(self):
        if self.stack == []:
            raise Error, 'bad MultiFile.pop() call'
        if self.level <= 1:
            self.last = 0
        else:
            abslastpos = self.lastpos + self.start
        self.level = max(0, self.level - 1)
145
        self.stack.pop()
Tim Peters's avatar
Tim Peters committed
146
        if self.seekable:
147
            self.start = self.posstack.pop()
Tim Peters's avatar
Tim Peters committed
148 149 150 151 152 153 154 155 156 157 158
            if self.level > 0:
                self.lastpos = abslastpos - self.start

    def is_data(self, line):
        return line[:2] != '--'

    def section_divider(self, str):
        return "--" + str

    def end_marker(self, str):
        return "--" + str + "--"