panelparser.py 3.32 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2 3 4 5 6
# Module 'parser'
#
# Parse S-expressions output by the Panel Editor
# (which is written in Scheme so it can't help writing S-expressions).
#
# See notes at end of file.
7 8 9
from warnings import warnpy3k
warnpy3k("the panelparser module has been removed in Python 3.0", stacklevel=2)
del warnpy3k
Guido van Rossum's avatar
Guido van Rossum committed
10 11 12 13 14 15 16 17 18 19 20


whitespace = ' \t\n'
operators = '()\''
separators = operators + whitespace + ';' + '"'


# Tokenize a string.
# Return a list of tokens (strings).
#
def tokenize_string(s):
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
    tokens = []
    while s:
        c = s[:1]
        if c in whitespace:
            s = s[1:]
        elif c == ';':
            s = ''
        elif c == '"':
            n = len(s)
            i = 1
            while i < n:
                c = s[i]
                i = i+1
                if c == '"': break
                if c == '\\': i = i+1
            tokens.append(s[:i])
            s = s[i:]
        elif c in operators:
            tokens.append(c)
            s = s[1:]
        else:
            n = len(s)
            i = 1
            while i < n:
                if s[i] in separators: break
                i = i+1
            tokens.append(s[:i])
            s = s[i:]
    return tokens
Guido van Rossum's avatar
Guido van Rossum committed
50 51 52 53 54 55


# Tokenize a whole file (given as file object, not as file name).
# Return a list of tokens (strings).
#
def tokenize_file(fp):
56 57 58 59 60 61
    tokens = []
    while 1:
        line = fp.readline()
        if not line: break
        tokens = tokens + tokenize_string(line)
    return tokens
Guido van Rossum's avatar
Guido van Rossum committed
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76


# Exception raised by parse_exr.
#
syntax_error = 'syntax error'


# Parse an S-expression.
# Input is a list of tokens as returned by tokenize_*().
# Return a pair (expr, tokens)
# where expr is a list representing the s-expression,
# and tokens contains the remaining tokens.
# May raise syntax_error.
#
def parse_expr(tokens):
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
    if (not tokens) or tokens[0] != '(':
        raise syntax_error, 'expected "("'
    tokens = tokens[1:]
    expr = []
    while 1:
        if not tokens:
            raise syntax_error, 'missing ")"'
        if tokens[0] == ')':
            return expr, tokens[1:]
        elif tokens[0] == '(':
            subexpr, tokens = parse_expr(tokens)
            expr.append(subexpr)
        else:
            expr.append(tokens[0])
            tokens = tokens[1:]
Guido van Rossum's avatar
Guido van Rossum committed
92 93 94 95 96 97


# Parse a file (given as file object, not as file name).
# Return a list of parsed S-expressions found at the top level.
#
def parse_file(fp):
98 99 100 101 102 103
    tokens = tokenize_file(fp)
    exprlist = []
    while tokens:
        expr, tokens = parse_expr(tokens)
        exprlist.append(expr)
    return exprlist
Guido van Rossum's avatar
Guido van Rossum committed
104 105 106 107 108


# EXAMPLE:
#
# The input
109
#       '(hip (hop hur-ray))'
Guido van Rossum's avatar
Guido van Rossum committed
110 111
#
# passed to tokenize_string() returns the token list
112
#       ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
Guido van Rossum's avatar
Guido van Rossum committed
113 114
#
# When this is passed to parse_expr() it returns the expression
115
#       ['hip', ['hop', 'hur-ray']]
Guido van Rossum's avatar
Guido van Rossum committed
116 117 118 119
# plus an empty token list (because there are no tokens left.
#
# When a file containing the example is passed to parse_file() it returns
# a list whose only element is the output of parse_expr() above:
120
#       [['hip', ['hop', 'hur-ray']]]
Guido van Rossum's avatar
Guido van Rossum committed
121 122 123 124 125 126 127 128


# TOKENIZING:
#
# Comments start with semicolon (;) and continue till the end of the line.
#
# Tokens are separated by whitespace, except the following characters
# always form a separate token (outside strings):
129
#       ( ) '
Guido van Rossum's avatar
Guido van Rossum committed
130 131
# Strings are enclosed in double quotes (") and backslash (\) is used
# as escape character in strings.