fileinput.py 9.67 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
"""Helper class to quickly write a loop over all standard input files.

Typical use is:

    import fileinput
    for line in fileinput.input():
        process(line)

This iterates over the lines of all files listed in sys.argv[1:],
defaulting to sys.stdin if the list is empty.  If a filename is '-' it
is also replaced by sys.stdin.  To specify an alternative list of
filenames, pass it as the argument to input().  A single file name is
also allowed.

Functions filename(), lineno() return the filename and cumulative line
number of the line that has just been read; filelineno() returns its
line number in the current file; isfirstline() returns true iff the
line just read is the first line of its file; isstdin() returns true
iff the line was read from sys.stdin.  Function nextfile() closes the
current file so that the next iteration will read the first line from
the next file (if any); lines not read from the file will not count
towards the cumulative line count; the filename is not changed until
after the first line of the next file has been read.  Function close()
closes the sequence.

Before any lines have been read, filename() returns None and both line
numbers are zero; nextfile() has no effect.  After all lines have been
read, filename() and the line number functions return the values
pertaining to the last line read; nextfile() has no effect.

All files are opened in text mode.  If an I/O error occurs during
opening or reading a file, the IOError exception is raised.

If sys.stdin is used more than once, the second and further use will
return no lines, except perhaps for interactive use, or if it has been
explicitly reset (e.g. using sys.stdin.seek(0)).

Empty files are opened and immediately closed; the only time their
presence in the list of filenames is noticeable at all is when the
last file opened is empty.

It is possible that the last line of a file doesn't end in a newline
character; otherwise lines are returned including the trailing
newline.

Class FileInput is the implementation; its methods filename(),
lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
correspond to the functions in the module.  In addition it has a
readline() method which returns the next input line, and a
__getitem__() method which implements the sequence behavior.  The
sequence must be accessed in strictly sequential order; sequence
access and readline() cannot be mixed.

Optional in-place filtering: if the keyword argument inplace=1 is
passed to input() or to the FileInput constructor, the file is moved
to a backup file and standard output is directed to the input file.
This makes it possible to write a filter that rewrites its input file
in place.  If the keyword argument backup=".<some extension>" is also
given, it specifies the extension for the backup file, and the backup
file remains around; by default, the extension is ".bak" and it is
deleted when the output file is closed.  In-place filtering is
disabled when standard input is read.  XXX The current implementation
does not work for MS-DOS 8+3 filesystems.

65 66 67 68 69 70 71
Performance: this module is unfortunately one of the slower ways of
processing large numbers of input lines.  Nevertheless, a significant
speed-up has been obtained by using readlines(bufsize) instead of
readline().  A new keyword argument, bufsize=N, is present on the
input() function and the FileInput() class to override the default
buffer size.

72 73 74 75 76 77 78 79 80 81
XXX Possible additions:

- optional getopt argument processing
- specify open mode ('r' or 'rb')
- fileno()
- isatty()
- read(), read(size), even readlines()

"""

82
import sys, os, stat
83

Skip Montanaro's avatar
Skip Montanaro committed
84 85 86
__all__ = ["input","close","nextfile","filename","lineno","filelineno",
           "isfirstline","isstdin","FileInput"]

87 88
_state = None

89 90 91
DEFAULT_BUFSIZE = 8*1024

def input(files=None, inplace=0, backup="", bufsize=0):
92 93
    global _state
    if _state and _state._file:
94
        raise RuntimeError, "input() already active"
95
    _state = FileInput(files, inplace, backup, bufsize)
96 97 98 99 100 101 102
    return _state

def close():
    global _state
    state = _state
    _state = None
    if state:
103
        state.close()
104 105 106

def nextfile():
    if not _state:
107
        raise RuntimeError, "no active input()"
108 109 110 111
    return _state.nextfile()

def filename():
    if not _state:
112
        raise RuntimeError, "no active input()"
113 114 115 116
    return _state.filename()

def lineno():
    if not _state:
117
        raise RuntimeError, "no active input()"
118 119 120 121
    return _state.lineno()

def filelineno():
    if not _state:
122
        raise RuntimeError, "no active input()"
123 124 125 126
    return _state.filelineno()

def isfirstline():
    if not _state:
127
        raise RuntimeError, "no active input()"
128 129 130 131
    return _state.isfirstline()

def isstdin():
    if not _state:
132
        raise RuntimeError, "no active input()"
133 134 135 136
    return _state.isstdin()

class FileInput:

137
    def __init__(self, files=None, inplace=0, backup="", bufsize=0):
138 139 140
        if type(files) == type(''):
            files = (files,)
        else:
141 142
            if files is None:
                files = sys.argv[1:]
143
            if not files:
144 145 146
                files = ('-',)
            else:
                files = tuple(files)
147 148 149
        self._files = files
        self._inplace = inplace
        self._backup = backup
150
        self._bufsize = bufsize or DEFAULT_BUFSIZE
151 152 153 154 155 156 157
        self._savestdout = None
        self._output = None
        self._filename = None
        self._lineno = 0
        self._filelineno = 0
        self._file = None
        self._isstdin = 0
158
        self._backupfilename = None
159 160
        self._buffer = []
        self._bufindex = 0
161 162

    def __del__(self):
163
        self.close()
164 165

    def close(self):
166 167
        self.nextfile()
        self._files = ()
168 169

    def __getitem__(self, i):
170 171 172 173 174 175 176 177 178
        try:
            line = self._buffer[self._bufindex]
        except IndexError:
            pass
        else:
            self._bufindex += 1
            self._lineno += 1
            self._filelineno += 1
            return line
179 180 181 182 183 184
        if i != self._lineno:
            raise RuntimeError, "accessing lines out of order"
        line = self.readline()
        if not line:
            raise IndexError, "end of input reached"
        return line
185 186

    def nextfile(self):
187 188 189 190
        savestdout = self._savestdout
        self._savestdout = 0
        if savestdout:
            sys.stdout = savestdout
191

192 193 194 195
        output = self._output
        self._output = 0
        if output:
            output.close()
196

197 198 199 200
        file = self._file
        self._file = 0
        if file and not self._isstdin:
            file.close()
201

202 203 204 205 206
        backupfilename = self._backupfilename
        self._backupfilename = 0
        if backupfilename and not self._backup:
            try: os.unlink(backupfilename)
            except: pass
207

208
        self._isstdin = 0
209 210
        self._buffer = []
        self._bufindex = 0
211 212

    def readline(self):
213 214 215 216 217 218 219 220 221
        try:
            line = self._buffer[self._bufindex]
        except IndexError:
            pass
        else:
            self._bufindex += 1
            self._lineno += 1
            self._filelineno += 1
            return line
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
        if not self._file:
            if not self._files:
                return ""
            self._filename = self._files[0]
            self._files = self._files[1:]
            self._filelineno = 0
            self._file = None
            self._isstdin = 0
            self._backupfilename = 0
            if self._filename == '-':
                self._filename = '<stdin>'
                self._file = sys.stdin
                self._isstdin = 1
            else:
                if self._inplace:
                    self._backupfilename = (
                        self._filename + (self._backup or ".bak"))
                    try: os.unlink(self._backupfilename)
                    except os.error: pass
241
                    # The next few lines may raise IOError
242 243
                    os.rename(self._filename, self._backupfilename)
                    self._file = open(self._backupfilename, "r")
244 245 246 247 248 249 250 251 252 253 254 255 256
                    try:
                        perm = os.fstat(self._file.fileno())[stat.ST_MODE]
                    except:
                        self._output = open(self._filename, "w")
                    else:
                        fd = os.open(self._filename,
                                     os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
                                     perm)
                        self._output = os.fdopen(fd, "w")
                        try:
                            os.chmod(self._filename, perm)
                        except:
                            pass
257 258 259 260 261
                    self._savestdout = sys.stdout
                    sys.stdout = self._output
                else:
                    # This may raise IOError
                    self._file = open(self._filename, "r")
262 263 264 265
        self._buffer = self._file.readlines(self._bufsize)
        self._bufindex = 0
        if not self._buffer:
            self.nextfile()
266 267
        # Recursive call
        return self.readline()
268 269

    def filename(self):
270
        return self._filename
271 272

    def lineno(self):
273
        return self._lineno
274 275

    def filelineno(self):
276
        return self._filelineno
277 278

    def isfirstline(self):
279
        return self._filelineno == 1
280 281

    def isstdin(self):
282
        return self._isstdin
283 284 285 286 287 288 289

def _test():
    import getopt
    inplace = 0
    backup = 0
    opts, args = getopt.getopt(sys.argv[1:], "ib:")
    for o, a in opts:
290 291
        if o == '-i': inplace = 1
        if o == '-b': backup = a
292
    for line in input(args, inplace=inplace, backup=backup):
293 294 295 296
        if line[-1:] == '\n': line = line[:-1]
        if line[-1:] == '\r': line = line[:-1]
        print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
                                   isfirstline() and "*" or "", line)
297 298 299 300
    print "%d: %s[%d]" % (lineno(), filename(), filelineno())

if __name__ == '__main__':
    _test()