StringIO.py 10.4 KB
Newer Older
1
r"""File-like objects that read from or write to a string buffer.
2 3 4 5 6 7 8 9 10 11 12 13 14 15

This implements (nearly) all stdio methods.

f = StringIO()      # ready for writing
f = StringIO(buf)   # ready for reading
f.close()           # explicitly release resources held
flag = f.isatty()   # always false
pos = f.tell()      # get current position
f.seek(pos)         # set current position
f.seek(pos, mode)   # mode 0: absolute; 1: relative; 2: relative to EOF
buf = f.read()      # read until EOF
buf = f.read(n)     # read up to n bytes
buf = f.readline()  # read until end of line ('\n') or EOF
list = f.readlines()# list of f.readline() results until EOF
16
f.truncate([size])  # truncate file at to at most size (default: current pos)
17 18 19 20 21 22
f.write(buf)        # write at current position
f.writelines(list)  # for line in list: f.write(line)
f.getvalue()        # return whole file's contents as a string

Notes:
- Using a real file is often faster (but less convenient).
23 24
- There's also a much faster implementation in C, called cStringIO, but
  it's not subclassable.
25 26 27 28 29 30
- fileno() is left unimplemented so that code which uses it triggers
  an exception early.
- Seeking far beyond EOF and then writing will insert real null
  bytes that occupy space in the buffer.
- There's a simple test set (see end of this file).
"""
31
try:
32
    from errno import EINVAL
33
except ImportError:
34
    EINVAL = 22
35

36 37
__all__ = ["StringIO"]

38 39 40 41
def _complain_ifclosed(closed):
    if closed:
        raise ValueError, "I/O operation on closed file"

42
class StringIO:
Tim Peters's avatar
Tim Peters committed
43 44
    """class StringIO([buffer])

45 46
    When a StringIO object is created, it can be initialized to an existing
    string by passing the string to the constructor. If no string is given,
Tim Peters's avatar
Tim Peters committed
47
    the StringIO will start empty.
48 49 50 51

    The StringIO object can accept either Unicode or 8-bit strings, but
    mixing the two may take some care. If both are used, 8-bit strings that
    cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
Tim Peters's avatar
Tim Peters committed
52
    a UnicodeError to be raised when getvalue() is called.
53
    """
54
    def __init__(self, buf = ''):
55
        # Force self.buf to be a string or unicode
56
        if not isinstance(buf, basestring):
57 58
            buf = str(buf)
        self.buf = buf
59 60 61
        self.len = len(buf)
        self.buflist = []
        self.pos = 0
62
        self.closed = False
63
        self.softspace = 0
64

65
    def __iter__(self):
66 67 68
        return self

    def next(self):
69 70 71 72 73 74
        """A file object is its own iterator, for example iter(f) returns f
        (unless f is closed). When a file is used as an iterator, typically
        in a for loop (for example, for line in f: print line), the next()
        method is called repeatedly. This method returns the next input line,
        or raises StopIteration when EOF is hit.
        """
75
        _complain_ifclosed(self.closed)
76 77 78 79
        r = self.readline()
        if not r:
            raise StopIteration
        return r
80

81
    def close(self):
Tim Peters's avatar
Tim Peters committed
82
        """Free the memory buffer.
83
        """
84
        if not self.closed:
85
            self.closed = True
86
            del self.buf, self.pos
87 88

    def isatty(self):
89 90 91
        """Returns False because StringIO objects are not connected to a
        tty-like device.
        """
92
        _complain_ifclosed(self.closed)
93
        return False
94 95

    def seek(self, pos, mode = 0):
96 97 98 99 100 101 102 103
        """Set the file's current position.

        The mode argument is optional and defaults to 0 (absolute file
        positioning); other values are 1 (seek relative to the current
        position) and 2 (seek relative to the file's end).

        There is no return value.
        """
104
        _complain_ifclosed(self.closed)
105
        if self.buflist:
106
            self.buf += ''.join(self.buflist)
107 108 109 110 111 112
            self.buflist = []
        if mode == 1:
            pos += self.pos
        elif mode == 2:
            pos += self.len
        self.pos = max(0, pos)
113 114

    def tell(self):
115
        """Return the file's current position."""
116
        _complain_ifclosed(self.closed)
117
        return self.pos
118 119

    def read(self, n = -1):
120 121 122 123 124 125 126
        """Read at most size bytes from the file
        (less if the read hits EOF before obtaining size bytes).

        If the size argument is negative or omitted, read all data until EOF
        is reached. The bytes are returned as a string object. An empty
        string is returned when EOF is encountered immediately.
        """
127
        _complain_ifclosed(self.closed)
128
        if self.buflist:
129
            self.buf += ''.join(self.buflist)
130
            self.buflist = []
131
        if n is None or n < 0:
132 133 134 135 136 137
            newpos = self.len
        else:
            newpos = min(self.pos+n, self.len)
        r = self.buf[self.pos:newpos]
        self.pos = newpos
        return r
138 139

    def readline(self, length=None):
140
        r"""Read one entire line from the file.
141 142 143 144 145 146 147 148 149 150 151

        A trailing newline character is kept in the string (but may be absent
        when a file ends with an incomplete line). If the size argument is
        present and non-negative, it is a maximum byte count (including the
        trailing newline) and an incomplete line may be returned.

        An empty string is returned only when EOF is encountered immediately.

        Note: Unlike stdio's fgets(), the returned string contains null
        characters ('\0') if they occurred in the input.
        """
152
        _complain_ifclosed(self.closed)
153
        if self.buflist:
154
            self.buf += ''.join(self.buflist)
155 156 157 158 159 160
            self.buflist = []
        i = self.buf.find('\n', self.pos)
        if i < 0:
            newpos = self.len
        else:
            newpos = i+1
161
        if length is not None and length > 0:
162 163 164 165 166
            if self.pos + length < newpos:
                newpos = self.pos + length
        r = self.buf[self.pos:newpos]
        self.pos = newpos
        return r
167 168

    def readlines(self, sizehint = 0):
169 170 171 172 173 174 175
        """Read until EOF using readline() and return a list containing the
        lines thus read.

        If the optional sizehint argument is present, instead of reading up
        to EOF, whole lines totalling approximately sizehint bytes (or more
        to accommodate a final whole line).
        """
176 177 178 179 180 181 182 183 184 185
        total = 0
        lines = []
        line = self.readline()
        while line:
            lines.append(line)
            total += len(line)
            if 0 < sizehint <= total:
                break
            line = self.readline()
        return lines
186 187

    def truncate(self, size=None):
188 189 190 191 192 193 194 195 196 197
        """Truncate the file's size.

        If the optional size argument is present, the file is truncated to
        (at most) that size. The size defaults to the current position.
        The current file position is not changed unless the position
        is beyond the new file size.

        If the specified size exceeds the file's current size, the
        file remains unchanged.
        """
Tim Peters's avatar
Tim Peters committed
198
        _complain_ifclosed(self.closed)
199 200 201 202 203 204 205
        if size is None:
            size = self.pos
        elif size < 0:
            raise IOError(EINVAL, "Negative size not allowed")
        elif size < self.pos:
            self.pos = size
        self.buf = self.getvalue()[:size]
206
        self.len = size
207 208

    def write(self, s):
209 210 211 212
        """Write a string to the file.

        There is no return value.
        """
213
        _complain_ifclosed(self.closed)
214
        if not s: return
215
        # Force s to be a string or unicode
216
        if not isinstance(s, basestring):
217
            s = str(s)
218 219 220
        spos = self.pos
        slen = self.len
        if spos == slen:
221
            self.buflist.append(s)
222
            self.len = self.pos = spos + len(s)
223
            return
224 225 226 227 228
        if spos > slen:
            self.buflist.append('\0'*(spos - slen))
            slen = spos
        newpos = spos + len(s)
        if spos < slen:
229
            if self.buflist:
230
                self.buf += ''.join(self.buflist)
231
            self.buflist = [self.buf[:spos], s, self.buf[newpos:]]
232
            self.buf = ''
233
            if newpos > slen:
234
                slen = newpos
235 236
        else:
            self.buflist.append(s)
237 238
            slen = newpos
        self.len = slen
239
        self.pos = newpos
240

241
    def writelines(self, iterable):
242 243 244 245 246 247 248
        """Write a sequence of strings to the file. The sequence can be any
        iterable object producing strings, typically a list of strings. There
        is no return value.

        (The name is intended to match readlines(); writelines() does not add
        line separators.)
        """
249 250 251
        write = self.write
        for line in iterable:
            write(line)
252 253

    def flush(self):
254 255
        """Flush the internal buffer
        """
256
        _complain_ifclosed(self.closed)
257 258

    def getvalue(self):
259 260 261 262 263 264 265 266
        """
        Retrieve the entire contents of the "file" at any time before
        the StringIO object's close() method is called.

        The StringIO object can accept either Unicode or 8-bit strings,
        but mixing the two may take some care. If both are used, 8-bit
        strings that cannot be interpreted as 7-bit ASCII (that use the
        8th bit) will cause a UnicodeError to be raised when getvalue()
Tim Peters's avatar
Tim Peters committed
267
        is called.
268
        """
269
        _complain_ifclosed(self.closed)
270
        if self.buflist:
271
            self.buf += ''.join(self.buflist)
272 273
            self.buflist = []
        return self.buf
274 275 276 277 278


# A little test suite

def test():
279 280
    import sys
    if sys.argv[1:]:
281
        file = sys.argv[1]
282
    else:
283
        file = '/etc/passwd'
284 285 286 287
    lines = open(file, 'r').readlines()
    text = open(file, 'r').read()
    f = StringIO()
    for line in lines[:-2]:
288
        f.write(line)
289 290
    f.writelines(lines[-2:])
    if f.getvalue() != text:
291
        raise RuntimeError, 'write failed'
292 293 294 295 296
    length = f.tell()
    print 'File length =', length
    f.seek(len(lines[0]))
    f.write(lines[1])
    f.seek(0)
297
    print 'First line =', repr(f.readline())
298
    print 'Position =', f.tell()
299
    line = f.readline()
300
    print 'Second line =', repr(line)
301 302 303
    f.seek(-len(line), 1)
    line2 = f.read(len(line))
    if line != line2:
304
        raise RuntimeError, 'bad result after seek back'
305 306 307 308 309 310
    f.seek(len(line2), 1)
    list = f.readlines()
    line = list[-1]
    f.seek(f.tell() - len(line))
    line2 = f.read()
    if line != line2:
311
        raise RuntimeError, 'bad result after seek back from EOF'
312 313 314
    print 'Read', len(list), 'more lines'
    print 'File length =', f.tell()
    if f.tell() != length:
315
        raise RuntimeError, 'bad length'
316 317 318 319 320
    f.truncate(length/2)
    f.seek(0, 2)
    print 'Truncated length =', f.tell()
    if f.tell() != length/2:
        raise RuntimeError, 'truncate did not adjust length'
321
    f.close()
322 323

if __name__ == '__main__':
324
    test()