Kaydet (Commit) 39d34512 authored tarafından Guido van Rossum's avatar Guido van Rossum

parse_declaration(): be more lenient in what we accept. We now

basically accept <!...> where the dots can be single- or double-quoted
strings or any other character except >.

Background: I found a real-life example that failed to parse with
the old assumption: http://www.opensource.org/licenses/jabberpl.html
contains a few constructs of the form <![if !supportLists]>...<![endif]>.
üst 2b63969a
...@@ -39,7 +39,7 @@ attrfind = re.compile( ...@@ -39,7 +39,7 @@ attrfind = re.compile(
r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*' r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?') r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?')
declname = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*') decldata = re.compile(r'[^>\'\"]+')
declstringlit = re.compile(r'(\'[^\']*\'|"[^"]*")\s*') declstringlit = re.compile(r'(\'[^\']*\'|"[^"]*")\s*')
...@@ -212,8 +212,8 @@ class SGMLParser: ...@@ -212,8 +212,8 @@ class SGMLParser:
def parse_declaration(self, i): def parse_declaration(self, i):
rawdata = self.rawdata rawdata = self.rawdata
j = i + 2 j = i + 2
# in practice, this should look like: ((name|stringlit) S*)+ '>' n = len(rawdata)
while 1: while j < n:
c = rawdata[j:j+1] c = rawdata[j:j+1]
if c == ">": if c == ">":
# end of declaration syntax # end of declaration syntax
...@@ -225,19 +225,14 @@ class SGMLParser: ...@@ -225,19 +225,14 @@ class SGMLParser:
# incomplete or an error? # incomplete or an error?
return -1 return -1
j = m.end() j = m.end()
elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": else:
m = declname.match(rawdata, j) m = decldata.match(rawdata, j)
if not m: if not m:
# incomplete or an error? # incomplete or an error?
return -1 return -1
j = m.end() j = m.end()
elif i == len(rawdata): # end of buffer between tokens
# end of buffer between tokens return -1
return -1
else:
raise SGMLParseError(
"unexpected char in declaration: %s" % `rawdata[i]`)
assert 0, "can't get here!"
# Internal -- parse processing instr, return length or -1 if not terminated # Internal -- parse processing instr, return length or -1 if not terminated
def parse_pi(self, i): def parse_pi(self, i):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment