chunk.py 5.31 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
"""Simple class to read IFF chunks.

An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File
Format)) has the following structure:

+----------------+
| ID (4 bytes)   |
+----------------+
| size (4 bytes) |
+----------------+
| data           |
| ...            |
+----------------+

The ID is a 4-byte string which identifies the type of chunk.

The size field (a 32-bit value, encoded using big-endian byte order)
gives the size of the whole chunk, including the 8-byte header.

20
Usually an IFF-type file consists of one or more chunks.  The proposed
Tim Peters's avatar
Tim Peters committed
21
usage of the Chunk class defined here is to instantiate an instance at
22 23
the start of each chunk and read from the instance until it reaches
the end, after which a new instance can be instantiated.  At the end
24
of the file, creating a new instance will fail with an EOFError
25 26 27
exception.

Usage:
28
while True:
29 30 31 32 33
    try:
        chunk = Chunk(file)
    except EOFError:
        break
    chunktype = chunk.getname()
34
    while True:
35 36 37 38 39 40 41 42 43 44 45 46
        data = chunk.read(nbytes)
        if not data:
            pass
        # do something with data

The interface is file-like.  The implemented methods are:
read, close, seek, tell, isatty.
Extra methods are: skip() (called by close, skips to the end of the chunk),
getname() (returns the name (ID) of the chunk)

The __init__ method has one required argument, a file-like object
(including a chunk instance), and one optional argument, a flag which
Tim Peters's avatar
Tim Peters committed
47
specifies whether or not chunks are aligned on 2-byte boundaries.  The
48 49 50 51
default is 1, i.e. aligned.
"""

class Chunk:
52
    def __init__(self, file, align=True, bigendian=True, inclheader=False):
53
        import struct
54
        self.closed = False
Tim Peters's avatar
Tim Peters committed
55
        self.align = align      # whether to align to word (2-byte) boundaries
56 57 58 59
        if bigendian:
            strflag = '>'
        else:
            strflag = '<'
60 61 62 63 64
        self.file = file
        self.chunkname = file.read(4)
        if len(self.chunkname) < 4:
            raise EOFError
        try:
65
            self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0]
66
        except struct.error:
67
            raise EOFError from None
68 69
        if inclheader:
            self.chunksize = self.chunksize - 8 # subtract header
70
        self.size_read = 0
Guido van Rossum's avatar
Guido van Rossum committed
71 72
        try:
            self.offset = self.file.tell()
73
        except (AttributeError, OSError):
74
            self.seekable = False
Guido van Rossum's avatar
Guido van Rossum committed
75
        else:
76
            self.seekable = True
77 78 79 80 81

    def getname(self):
        """Return the name (ID) of the current chunk."""
        return self.chunkname

82 83 84 85
    def getsize(self):
        """Return the size of the current chunk."""
        return self.chunksize

86 87
    def close(self):
        if not self.closed:
88 89 90 91
            try:
                self.skip()
            finally:
                self.closed = True
92 93 94

    def isatty(self):
        if self.closed:
95
            raise ValueError("I/O operation on closed file")
96
        return False
97

98
    def seek(self, pos, whence=0):
99 100 101
        """Seek to specified position into the chunk.
        Default position is 0 (start of chunk).
        If the file is not seekable, this will result in an error.
Tim Peters's avatar
Tim Peters committed
102
        """
103 104

        if self.closed:
105
            raise ValueError("I/O operation on closed file")
Guido van Rossum's avatar
Guido van Rossum committed
106
        if not self.seekable:
107
            raise OSError("cannot seek")
108
        if whence == 1:
109
            pos = pos + self.size_read
110
        elif whence == 2:
111
            pos = pos + self.chunksize
112 113 114 115 116 117 118
        if pos < 0 or pos > self.chunksize:
            raise RuntimeError
        self.file.seek(self.offset + pos, 0)
        self.size_read = pos

    def tell(self):
        if self.closed:
119
            raise ValueError("I/O operation on closed file")
120 121
        return self.size_read

122
    def read(self, size=-1):
123 124
        """Read at most size bytes from the chunk.
        If size is omitted or negative, read until the end
125
        of the chunk.
Tim Peters's avatar
Tim Peters committed
126
        """
127 128

        if self.closed:
129
            raise ValueError("I/O operation on closed file")
130
        if self.size_read >= self.chunksize:
131
            return b''
132 133 134
        if size < 0:
            size = self.chunksize - self.size_read
        if size > self.chunksize - self.size_read:
Tim Peters's avatar
Tim Peters committed
135
            size = self.chunksize - self.size_read
136
        data = self.file.read(size)
137 138 139 140 141 142 143 144 145 146 147 148 149
        self.size_read = self.size_read + len(data)
        if self.size_read == self.chunksize and \
           self.align and \
           (self.chunksize & 1):
            dummy = self.file.read(1)
            self.size_read = self.size_read + len(dummy)
        return data

    def skip(self):
        """Skip the rest of the chunk.
        If you are not interested in the contents of the chunk,
        this method should be called so that the file points to
        the start of the next chunk.
Tim Peters's avatar
Tim Peters committed
150
        """
151 152

        if self.closed:
153
            raise ValueError("I/O operation on closed file")
Guido van Rossum's avatar
Guido van Rossum committed
154 155 156 157 158 159 160 161 162
        if self.seekable:
            try:
                n = self.chunksize - self.size_read
                # maybe fix alignment
                if self.align and (self.chunksize & 1):
                    n = n + 1
                self.file.seek(n, 1)
                self.size_read = self.size_read + n
                return
163
            except OSError:
Guido van Rossum's avatar
Guido van Rossum committed
164 165 166 167 168 169
                pass
        while self.size_read < self.chunksize:
            n = min(8192, self.chunksize - self.size_read)
            dummy = self.read(n)
            if not dummy:
                raise EOFError