2.79 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# Module 'parser'
# Parse S-expressions output by the Panel Editor
# (which is written in Scheme so it can't help writing S-expressions).
# See notes at end of file.

whitespace = ' \t\n'
operators = '()\''
separators = operators + whitespace + ';' + '"'

# Tokenize a string.
# Return a list of tokens (strings).
def tokenize_string(s):
	tokens = []
	while s:
		c = s[:1]
		if c in whitespace:
			s = s[1:]
Guido van Rossum's avatar
Guido van Rossum committed
		elif c == ';':
Guido van Rossum's avatar
Guido van Rossum committed
			s = ''
Guido van Rossum's avatar
Guido van Rossum committed
		elif c == '"':
Guido van Rossum's avatar
Guido van Rossum committed
26 27 28 29 30
			n = len(s)
			i = 1
			while i < n:
				c = s[i]
				i = i+1
Guido van Rossum's avatar
Guido van Rossum committed
31 32
				if c == '"': break
				if c == '\\': i = i+1
Guido van Rossum's avatar
Guido van Rossum committed
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
			s = s[i:]
		elif c in operators:
			s = s[1:]
			n = len(s)
			i = 1
			while i < n:
				if s[i] in separators: break
				i = i+1
			s = s[i:]
	return tokens

# Tokenize a whole file (given as file object, not as file name).
# Return a list of tokens (strings).
def tokenize_file(fp):
	tokens = []
	while 1:
		line = fp.readline()
		if not line: break
		tokens = tokens + tokenize_string(line)
	return tokens

# Exception raised by parse_exr.
syntax_error = 'syntax error'

# Parse an S-expression.
# Input is a list of tokens as returned by tokenize_*().
# Return a pair (expr, tokens)
# where expr is a list representing the s-expression,
# and tokens contains the remaining tokens.
# May raise syntax_error.
def parse_expr(tokens):
	if (not tokens) or tokens[0] != '(':
Guido van Rossum's avatar
Guido van Rossum committed
75 76 77 78 79 80
		raise syntax_error, 'expected "("'
	tokens = tokens[1:]
	expr = []
	while 1:
		if not tokens:
			raise syntax_error, 'missing ")"'
Guido van Rossum's avatar
Guido van Rossum committed
		if tokens[0] == ')':
Guido van Rossum's avatar
Guido van Rossum committed
			return expr, tokens[1:]
Guido van Rossum's avatar
Guido van Rossum committed
		elif tokens[0] == '(':
Guido van Rossum's avatar
Guido van Rossum committed
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
			subexpr, tokens = parse_expr(tokens)
			tokens = tokens[1:]

# Parse a file (given as file object, not as file name).
# Return a list of parsed S-expressions found at the top level.
def parse_file(fp):
	tokens = tokenize_file(fp)
	exprlist = []
	while tokens:
		expr, tokens = parse_expr(tokens)
	return exprlist

# The input
#	'(hip (hop hur-ray))'
# passed to tokenize_string() returns the token list
#	['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
# When this is passed to parse_expr() it returns the expression
#	['hip', ['hop', 'hur-ray']]
# plus an empty token list (because there are no tokens left.
# When a file containing the example is passed to parse_file() it returns
# a list whose only element is the output of parse_expr() above:
#	[['hip', ['hop', 'hur-ray']]]

# Comments start with semicolon (;) and continue till the end of the line.
# Tokens are separated by whitespace, except the following characters
# always form a separate token (outside strings):
#	( ) '
# Strings are enclosed in double quotes (") and backslash (\) is used
# as escape character in strings.