shlex.py 7.57 KB
Newer Older
1 2
"""A lexical analyzer class for simple shell-like syntaxes."""

Tim Peters's avatar
Tim Peters committed
3
# Module and documentation by Eric S. Raymond, 21 Dec 1998
Guido van Rossum's avatar
Guido van Rossum committed
4
# Input stacking and error message cleanup added by ESR, March 2000
Tim Peters's avatar
Tim Peters committed
5
# push_source() and pop_source() made explicit by ESR, January 2001.
6

7
import os.path
8
import sys
9

10 11
__all__ = ["shlex"]

12
class shlex:
Tim Peters's avatar
Tim Peters committed
13
    "A lexical analyzer class for simple shell-like syntaxes."
Guido van Rossum's avatar
Guido van Rossum committed
14
    def __init__(self, instream=None, infile=None):
15 16
        if instream:
            self.instream = instream
Guido van Rossum's avatar
Guido van Rossum committed
17
            self.infile = infile
18 19
        else:
            self.instream = sys.stdin
Guido van Rossum's avatar
Guido van Rossum committed
20
            self.infile = None
21
        self.commenters = '#'
Fred Drake's avatar
Fred Drake committed
22 23
        self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
                          'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
24 25 26
        self.whitespace = ' \t\r\n'
        self.quotes = '\'"'
        self.state = ' '
27
        self.pushback = []
28 29 30
        self.lineno = 1
        self.debug = 0
        self.token = ''
Fred Drake's avatar
Fred Drake committed
31
        self.filestack = []
Guido van Rossum's avatar
Guido van Rossum committed
32 33
        self.source = None
        if self.debug:
34 35
            print 'shlex: reading from %s, line %d' \
                  % (self.instream, self.lineno)
36 37 38

    def push_token(self, tok):
        "Push a token onto the stack popped by the get_token method"
Guido van Rossum's avatar
Guido van Rossum committed
39 40
        if self.debug >= 1:
            print "shlex: pushing token " + `tok`
41
        self.pushback = [tok] + self.pushback
42

43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
    def push_source(self, newstream, newfile=None):
        "Push an input source onto the lexer's input source stack."
        self.filestack.insert(0, (self.infile, self.instream, self.lineno))
        self.infile = newfile
        self.instream = newstream
        self.lineno = 1
        if self.debug:
            if newfile:
                print 'shlex: pushing to file %s' % (self.infile,)
            else:
                print 'shlex: pushing to stream %s' % (self.instream,)

    def pop_source(self):
        "Pop the input source stack."
        self.instream.close()
        (self.infile, self.instream, self.lineno) = self.filestack[0]
        self.filestack = self.filestack[1:]
        if self.debug:
            print 'shlex: popping to %s, line %d' \
                  % (self.instream, self.lineno)
        self.state = ' '

65
    def get_token(self):
Guido van Rossum's avatar
Guido van Rossum committed
66
        "Get a token from the input stream (or from stack if it's nonempty)"
67 68 69
        if self.pushback:
            tok = self.pushback[0]
            self.pushback = self.pushback[1:]
Guido van Rossum's avatar
Guido van Rossum committed
70 71
            if self.debug >= 1:
                print "shlex: popping token " + `tok`
72
            return tok
Fred Drake's avatar
Fred Drake committed
73
        # No pushback.  Get a token.
Guido van Rossum's avatar
Guido van Rossum committed
74 75 76
        raw = self.read_token()
        # Handle inclusions
        while raw == self.source:
77 78 79 80
            spec = self.sourcehook(self.read_token())
            if spec:
                (newfile, newstream) = spec
                self.push_source(newstream, newfile)
Guido van Rossum's avatar
Guido van Rossum committed
81 82 83 84 85 86
            raw = self.get_token()
        # Maybe we got EOF instead?
        while raw == "":
            if len(self.filestack) == 0:
                return ""
            else:
87
                self.pop_source()
Guido van Rossum's avatar
Guido van Rossum committed
88 89 90 91 92 93 94 95 96 97 98
                raw = self.get_token()
         # Neither inclusion nor EOF
        if self.debug >= 1:
            if raw:
                print "shlex: token=" + `raw`
            else:
                print "shlex: token=EOF"
        return raw

    def read_token(self):
        "Read a token from the input stream (no pushback or inclusions)"
99 100
        tok = ''
        while 1:
101
            nextchar = self.instream.read(1)
102 103 104
            if nextchar == '\n':
                self.lineno = self.lineno + 1
            if self.debug >= 3:
105
                print "shlex: in state", repr(self.state), \
Tim Peters's avatar
Tim Peters committed
106
                      "I see character:", repr(nextchar)
Fred Drake's avatar
Fred Drake committed
107
            if self.state is None:
108
                self.token = ''        # past end of file
Guido van Rossum's avatar
Guido van Rossum committed
109
                break
110 111
            elif self.state == ' ':
                if not nextchar:
112
                    self.state = None  # end of file
113 114 115
                    break
                elif nextchar in self.whitespace:
                    if self.debug >= 2:
Guido van Rossum's avatar
Guido van Rossum committed
116
                        print "shlex: I see whitespace in whitespace state"
117
                    if self.token:
Fred Drake's avatar
Fred Drake committed
118
                        break   # emit current token
119 120 121 122 123 124 125 126 127 128 129 130 131 132
                    else:
                        continue
                elif nextchar in self.commenters:
                    self.instream.readline()
                    self.lineno = self.lineno + 1
                elif nextchar in self.wordchars:
                    self.token = nextchar
                    self.state = 'a'
                elif nextchar in self.quotes:
                    self.token = nextchar
                    self.state = nextchar
                else:
                    self.token = nextchar
                    if self.token:
Fred Drake's avatar
Fred Drake committed
133
                        break   # emit current token
134 135 136 137 138 139 140
                    else:
                        continue
            elif self.state in self.quotes:
                self.token = self.token + nextchar
                if nextchar == self.state:
                    self.state = ' '
                    break
Tim Peters's avatar
Tim Peters committed
141
                elif not nextchar:      # end of file
142 143 144 145
                    if self.debug >= 2:
                        print "shlex: I see EOF in quotes state"
                    # XXX what error should be raised here?
                    raise ValueError, "No closing quotation"
146 147
            elif self.state == 'a':
                if not nextchar:
Tim Peters's avatar
Tim Peters committed
148
                    self.state = None   # end of file
149 150 151
                    break
                elif nextchar in self.whitespace:
                    if self.debug >= 2:
Guido van Rossum's avatar
Guido van Rossum committed
152
                        print "shlex: I see whitespace in word state"
153 154
                    self.state = ' '
                    if self.token:
Fred Drake's avatar
Fred Drake committed
155
                        break   # emit current token
156 157 158 159 160 161 162 163 164 165
                    else:
                        continue
                elif nextchar in self.commenters:
                    self.instream.readline()
                    self.lineno = self.lineno + 1
                elif nextchar in self.wordchars or nextchar in self.quotes:
                    self.token = self.token + nextchar
                else:
                    self.pushback = [nextchar] + self.pushback
                    if self.debug >= 2:
Guido van Rossum's avatar
Guido van Rossum committed
166
                        print "shlex: I see punctuation in word state"
167
                    self.state = ' '
168
                    if self.token:
Fred Drake's avatar
Fred Drake committed
169
                        break   # emit current token
170 171 172 173
                    else:
                        continue
        result = self.token
        self.token = ''
Guido van Rossum's avatar
Guido van Rossum committed
174 175 176 177 178
        if self.debug > 1:
            if result:
                print "shlex: raw token=" + `result`
            else:
                print "shlex: raw token=EOF"
179 180
        return result

Guido van Rossum's avatar
Guido van Rossum committed
181 182 183 184
    def sourcehook(self, newfile):
        "Hook called on a filename to be sourced."
        if newfile[0] == '"':
            newfile = newfile[1:-1]
185 186 187
        # This implements cpp-like semantics for relative-path inclusion.
        if type(self.infile) == type("") and not os.path.isabs(newfile):
            newfile = os.path.join(os.path.dirname(self.infile), newfile)
Guido van Rossum's avatar
Guido van Rossum committed
188 189
        return (newfile, open(newfile, "r"))

Guido van Rossum's avatar
Guido van Rossum committed
190 191 192 193 194 195 196 197
    def error_leader(self, infile=None, lineno=None):
        "Emit a C-compiler-like, Emacs-friendly error-message leader."
        if not infile:
            infile = self.infile
        if not lineno:
            lineno = self.lineno
        return "\"%s\", line %d: " % (infile, lineno)

198

Tim Peters's avatar
Tim Peters committed
199
if __name__ == '__main__':
200 201 202 203 204
    if len(sys.argv) == 1:
        lexer = shlex()
    else:
        file = sys.argv[1]
        lexer = shlex(open(file), file)
205 206
    while 1:
        tt = lexer.get_token()
207 208 209
        if tt:
            print "Token: " + repr(tt)
        else:
210
            break