Kaydet (Commit) be7c45ee authored tarafından Guido van Rossum's avatar Guido van Rossum

New address parser by Ben Escoto replaces

 Sjoerd Mullender's parseaddr()
üst e6c128f4
...@@ -221,27 +221,24 @@ class Message: ...@@ -221,27 +221,24 @@ class Message:
# ('Guido van Rossum', 'guido@cwi.nl'). # ('Guido van Rossum', 'guido@cwi.nl').
def getaddr(self, name): def getaddr(self, name):
try: # New, by Ben Escoto
data = self[name] alist = self.getaddrlist(name)
except KeyError: if alist:
return None, None return alist[0]
return parseaddr(data) else:
return (None, None)
# Retrieve a list of addresses from a header, where each # Retrieve a list of addresses from a header, where each
# address is a tuple as returned by getaddr(). # address is a tuple as returned by getaddr().
def getaddrlist(self, name): def getaddrlist(self, name):
# XXX This function is not really correct. The split # New, by Ben Escoto
# on ',' might fail in the case of commas within
# quoted strings.
try: try:
data = self[name] data = self[name]
except KeyError: except KeyError:
return [] return []
data = string.splitfields(data, ',') a = AddrlistClass(data)
for i in range(len(data)): return a.getaddrlist()
data[i] = parseaddr(data[i])
return data
# Retrieve a date field from a header as a tuple compatible # Retrieve a date field from a header as a tuple compatible
# with time.mktime(). # with time.mktime().
...@@ -291,7 +288,7 @@ class Message: ...@@ -291,7 +288,7 @@ class Message:
# Utility functions # Utility functions
# ----------------- # -----------------
# XXX Should fix these to be really conformant. # XXX Should fix unquote() and quote() to be really conformant.
# XXX The inverses of the parse functions may also be useful. # XXX The inverses of the parse functions may also be useful.
...@@ -306,12 +303,7 @@ def unquote(str): ...@@ -306,12 +303,7 @@ def unquote(str):
return str return str
# Parse an address into (name, address) tuple # Add quotes around a string.
# (By Sjoerd Mullender)
error = 'parseaddr.error'
specials = re.compile(r'[][()<>,.;:@\" \000-\037\177-\377]')
def quote(str): def quote(str):
return '"%s"' % string.join( return '"%s"' % string.join(
...@@ -322,114 +314,244 @@ def quote(str): ...@@ -322,114 +314,244 @@ def quote(str):
'"'), '"'),
'\\"') '\\"')
# External interface to parse an address
def parseaddr(address): def parseaddr(address):
token = [] # the current token a = AddrlistClass(address)
tokens = [] # the list of tokens list = a.getaddrlist()
backslash = 0 if not list:
dquote = 0 return (None, None)
was_quoted = 0 else:
space = 0 return list[0]
paren = 0
for c in address:
if backslash: # Address parser class by Ben Escoto
token.append(c)
backslash = 0 class AddrlistClass:
if c == '\\':
backslash = 1 def __init__(self, field):
was_quoted = 1
continue self.specials = '()<>@,:;.\"[]'
if dquote: self.pos = 0
if c == '"': self.LWS = ' \t'
dquote = 0 self.CR = '\r'
else: self.atomends = self.specials + self.LWS + self.CR
token.append(c)
continue self.field = field
if c == '"': self.commentlist = []
dquote = 1
was_quoted = 1
continue def gotonext(self):
if paren:
if c == '(': while self.pos < len(self.field):
paren = paren + 1 if self.field[self.pos] in self.LWS + '\n\r':
elif c == ')': self.pos = self.pos + 1
paren = paren - 1 elif self.field[self.pos] == '(':
if paren == 0: self.commentlist.append(self.getcomment())
token = string.join(token, '') else: break
tokens.append((2, token))
token = [] def getaddrlist(self):
continue
token.append(c) ad = self.getaddress()
continue if ad:
if c == '(': return ad + self.getaddrlist()
paren = 1 else: return []
token = string.join(token, '')
tokens.append((was_quoted, token)) def getaddress(self):
was_quoted = 0 self.commentlist = []
token = [] self.gotonext()
continue
if c in string.whitespace: oldpos = self.pos
space = 1 oldcl = self.commentlist
continue plist = self.getphraselist()
if c in '<>@,;:.[]':
token = string.join(token, '') self.gotonext()
tokens.append((was_quoted, token)) returnlist = []
was_quoted = 0
token = [] if self.pos >= len(self.field):
tokens.append((0, c)) # Bad email address technically, no domain.
space = 0 if plist:
continue returnlist = [(string.join(self.commentlist), plist[0])]
if space:
token = string.join(token, '') elif self.field[self.pos] in '.@':
tokens.append((was_quoted, token)) # email address is just an addrspec
was_quoted = 0 # this isn't very efficient since we start over
token = [] self.pos = oldpos
space = 0 self.commentlist = oldcl
token.append(c) addrspec = self.getaddrspec()
token = string.join(token, '') returnlist = [(string.join(self.commentlist), addrspec)]
tokens.append((was_quoted, token))
if (0, '<') in tokens: elif self.field[self.pos] == ':':
name = [] # address is a group
addr = [] returnlist = []
cur = name
for token in tokens: self.pos = self.pos + 1
if token[1] == '': while self.pos < len(self.field):
continue self.gotonext()
if token == (0, '<'): if self.field[self.pos] == ';':
if addr: self.pos = self.pos + 1
raise error, 'syntax error' break
cur = addr returnlist = returnlist + self.getaddress()
elif token == (0, '>'):
if cur is not addr: elif self.field[self.pos] == '<':
raise error, 'syntax error' # Address is a phrase then a route addr
cur = name routeaddr = self.getrouteaddr()
elif token[0] == 2:
if cur is name: if self.commentlist:
name.append('(' + token[1] + ')') returnlist = [(string.join(plist) + ' (' + \
else: string.join(self.commentlist) + ')', routeaddr)]
name.append(token[1]) else: returnlist = [(string.join(plist), routeaddr)]
elif token[0] == 1 and cur is addr:
if specials.search(token[1]):
cur.append(quote(token[1]))
else:
cur.append(token[1])
else:
cur.append(token[1])
else: else:
name = [] if plist:
addr = [] returnlist = [(string.join(self.commentlist), plist[0])]
for token in tokens:
if token[1] == '': self.gotonext()
continue if self.pos < len(self.field) and self.field[self.pos] == ',':
if token[0] == 2: self.pos = self.pos + 1
name.append(token[1]) return returnlist
elif token[0] == 1:
if specials.search(token[1]):
addr.append(quote(token[1])) def getrouteaddr(self):
else: # This just skips all the route stuff and returns the addrspec
addr.append(token[1]) if self.field[self.pos] != '<':
else: return
addr.append(token[1])
return string.join(name, ' '), string.join(addr, '') expectroute = 0
self.pos = self.pos + 1
self.gotonext()
while self.pos < len(self.field):
if expectroute:
self.getdomain()
expectroute = 0
elif self.field[self.pos] == '>':
self.pos = self.pos + 1
break
elif self.field[self.pos] == '@':
self.pos = self.pos + 1
expectroute = 1
elif self.field[self.pos] == ':':
self.pos = self.pos + 1
expectaddrspec = 1
else:
adlist = self.getaddrspec()
self.pos = self.pos + 1
break
self.gotonext()
return adlist
def getaddrspec(self):
aslist = []
self.gotonext()
while self.pos < len(self.field):
if self.field[self.pos] == '.':
aslist.append('.')
self.pos = self.pos + 1
elif self.field[self.pos] == '"':
aslist.append(self.getquote())
elif self.field[self.pos] in self.atomends:
break
else: aslist.append(self.getatom())
self.gotonext()
if self.pos >= len(self.field) or self.field[self.pos] != '@':
return string.join(aslist, '')
aslist.append('@')
self.pos = self.pos + 1
self.gotonext()
return string.join(aslist, '') + self.getdomain()
def getdomain(self):
sdlist = []
while self.pos < len(self.field):
if self.field[self.pos] in self.LWS:
self.pos = self.pos + 1
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
elif self.field[self.pos] == '[':
sdlist.append(self.getdomainliteral())
elif self.field[self.pos] == '.':
self.pos = self.pos + 1
sdlist.append('.')
elif self.field[self.pos] in self.atomends:
break
else: sdlist.append(self.getatom())
return string.join(sdlist, '')
def getdelimited(self, beginchar, endchars, allowcomments = 1):
if self.field[self.pos] != beginchar:
return ''
slist = ['']
quote = 0
self.pos = self.pos + 1
while self.pos < len(self.field):
if quote == 1:
slist.append(self.field[self.pos])
quote = 0
elif self.field[self.pos] in endchars:
self.pos = self.pos + 1
break
elif allowcomments and self.field[self.pos] == '(':
slist.append(self.getcomment())
elif self.field[self.pos] == '\\':
quote = 1
else:
slist.append(self.field[self.pos])
self.pos = self.pos + 1
return string.join(slist, '')
def getquote(self):
return self.getdelimited('"', '"\r', 0)
def getcomment(self):
return self.getdelimited('(', ')\r', 1)
def getdomainliteral(self):
return self.getdelimited('[', ']\r', 0)
def getatom(self):
atomlist = ['']
while self.pos < len(self.field):
if self.field[self.pos] in self.atomends:
break
else: atomlist.append(self.field[self.pos])
self.pos = self.pos + 1
return string.join(atomlist, '')
def getphraselist(self):
plist = []
while self.pos < len(self.field):
if self.field[self.pos] in self.LWS:
self.pos = self.pos + 1
elif self.field[self.pos] == '"':
plist.append(self.getquote())
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
elif self.field[self.pos] in self.atomends:
break
else: plist.append(self.getatom())
return plist
# Parse a date field # Parse a date field
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment