Kaydet (Commit) 145b2e01 authored tarafından Guido van Rossum's avatar Guido van Rossum

changed comment parsing

üst 667d7049
...@@ -21,7 +21,9 @@ entityref = regex.compile('&[a-zA-Z][a-zA-Z0-9]*[;.]') ...@@ -21,7 +21,9 @@ entityref = regex.compile('&[a-zA-Z][a-zA-Z0-9]*[;.]')
charref = regex.compile('&#[a-zA-Z0-9]+;') charref = regex.compile('&#[a-zA-Z0-9]+;')
starttagopen = regex.compile('<[a-zA-Z]') starttagopen = regex.compile('<[a-zA-Z]')
endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>') endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>')
special = regex.compile('<![^<>]*>')
commentopen = regex.compile('<!--') commentopen = regex.compile('<!--')
commentclose = regex.compile('--[ \t\n]*>')
# SGML parser base class -- find tags and call handler functions. # SGML parser base class -- find tags and call handler functions.
...@@ -111,6 +113,14 @@ class SGMLParser: ...@@ -111,6 +113,14 @@ class SGMLParser:
if k < 0: break if k < 0: break
i = i+k i = i+k
continue continue
k = special.match(rawdata, i)
if k >= 0:
if self.literal:
self.handle_data(rawdata[i])
i = i+1
continue
i = i+k
continue
elif rawdata[i] == '&': elif rawdata[i] == '&':
k = charref.match(rawdata, i) k = charref.match(rawdata, i)
if k >= 0: if k >= 0:
...@@ -141,25 +151,16 @@ class SGMLParser: ...@@ -141,25 +151,16 @@ class SGMLParser:
self.rawdata = rawdata[i:] self.rawdata = rawdata[i:]
# XXX if end: check for empty stack # XXX if end: check for empty stack
# Internal -- parse comment, return length or -1 if not ternimated # Internal -- parse comment, return length or -1 if not terminated
def parse_comment(self, i): def parse_comment(self, i):
rawdata = self.rawdata rawdata = self.rawdata
if rawdata[i:i+4] <> '<!--': if rawdata[i:i+4] <> '<!--':
raise RuntimeError, 'unexpected call to handle_comment' raise RuntimeError, 'unexpected call to handle_comment'
try: j = commentclose.search(rawdata, i+4)
j = string.index(rawdata, '--', i+4) if j < 0:
except string.index_error:
return -1 return -1
self.handle_comment(rawdata[i+4: j]) self.handle_comment(rawdata[i+4: j])
j = j+2 j = j+commentclose.match(rawdata, j)
n = len(rawdata)
while j < n and rawdata[j] in ' \t\n': j = j+1
if j == n: return -1 # Wait for final '>'
if rawdata[j] == '>':
j = j+1
else:
print '*** comment not terminated with >'
print repr(rawdata[j-5:j]), '*!*', repr(rawdata[j:j+5])
return j-i return j-i
# Internal -- handle starttag, return length or -1 if not terminated # Internal -- handle starttag, return length or -1 if not terminated
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment