Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
7e07b384
Kaydet (Commit)
7e07b384
authored
Nis 03, 1998
tarafından
Guido van Rossum
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Sjoerd's latest.
üst
0454b512
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
195 additions
and
117 deletions
+195
-117
xmllib.py
Lib/xmllib.py
+195
-117
No files found.
Lib/xmllib.py
Dosyayı görüntüle @
7e07b384
...
...
@@ -5,34 +5,50 @@ import re
import
string
version
=
'0.1'
# Regular expressions used for parsing
_S
=
'[
\t\r\n
]+'
_opS
=
'[
\t\r\n
]*'
_Name
=
'[a-zA-Z_:][-a-zA-Z0-9._:]*'
interesting
=
re
.
compile
(
'[&<]'
)
incomplete
=
re
.
compile
(
'&('
+
_Name
+
'|#[0-9]*|#x[0-9a-fA-F]*)?|'
'<([a-zA-Z_:][^<>]*|'
'/([a-zA-Z_:][^<>]*)?|'
'![^<>]*|'
r'\?[^<>]*)?'
)
ref
=
re
.
compile
(
'&('
+
_Name
+
'|#[0-9]+|#x[0-9a-fA-F]+);?'
)
illegal
=
re
.
compile
(
'[^
\t\r\n
-
\176\240
-
\377
]'
)
# illegal chars in content
interesting
=
re
.
compile
(
'[]&<]'
)
amp
=
re
.
compile
(
'&'
)
ref
=
re
.
compile
(
'&('
+
_Name
+
'|#[0-9]+|#x[0-9a-fA-F]+)[^-a-zA-Z0-9._:]'
)
entityref
=
re
.
compile
(
'&(?P<name>'
+
_Name
+
')[^-a-zA-Z0-9._:]'
)
charref
=
re
.
compile
(
'&#(?P<char>[0-9]+[^0-9]|x[0-9a-fA-F]+[^0-9a-fA-F])'
)
space
=
re
.
compile
(
_S
)
space
=
re
.
compile
(
_S
+
'$'
)
newline
=
re
.
compile
(
'
\n
'
)
starttagopen
=
re
.
compile
(
'<'
+
_Name
)
endtagopen
=
re
.
compile
(
'</'
)
starttagend
=
re
.
compile
(
_opS
+
'(?P<slash>/?)>'
)
endbracket
=
re
.
compile
(
'>'
)
endbracket
=
re
.
compile
(
_opS
+
'>'
)
tagfind
=
re
.
compile
(
_Name
)
cdataopen
=
re
.
compile
(
r'<!\[CDATA\['
)
cdataclose
=
re
.
compile
(
r'\]\]>'
)
doctype
=
re
.
compile
(
'<!DOCTYPE'
+
_S
+
'(?P<name>'
+
_Name
+
')'
+
_S
)
special
=
re
.
compile
(
'<!(?P<special>[^<>]*)>'
)
procopen
=
re
.
compile
(
r'<\?(?P<proc>'
+
_Name
+
')'
+
_S
)
# this matches one of the following:
# SYSTEM SystemLiteral
# PUBLIC PubidLiteral SystemLiteral
_SystemLiteral
=
'(?P<
%
s>
\'
[^
\'
]*
\'
|"[^"]*")'
_PublicLiteral
=
'(?P<
%
s>"[-
\'
()+,./:=?;!*#@$_
%%
\n\r
a-zA-Z0-9]*"|'
\
"'[-()+,./:=?;!*#@$_
%%
\n\r
a-zA-Z0-9]*')"
_ExternalId
=
'(?:SYSTEM|'
\
'PUBLIC'
+
_S
+
_PublicLiteral
%
'pubid'
+
\
')'
+
_S
+
_SystemLiteral
%
'syslit'
doctype
=
re
.
compile
(
'<!DOCTYPE'
+
_S
+
'(?P<name>'
+
_Name
+
')'
'(?:'
+
_S
+
_ExternalId
+
')?'
+
_opS
)
xmldecl
=
re
.
compile
(
'<
\
?xml'
+
_S
+
'version'
+
_opS
+
'='
+
_opS
+
'(?P<version>
\'
[^
\'
]*
\'
|"[^"]*")'
+
'(?:'
+
_S
+
'encoding'
+
_opS
+
'='
+
_opS
+
"(?P<encoding>'[A-Za-z][-A-Za-z0-9._]*'|"
'"[A-Za-z][-A-Za-z0-9._]*"))?'
'(?:'
+
_S
+
'standalone'
+
_opS
+
'='
+
_opS
+
'(?P<standalone>
\'
(?:yes|no)
\'
|"(?:yes|no)"))?'
+
_opS
+
'
\
?>'
)
procopen
=
re
.
compile
(
r'<\?(?P<proc>'
+
_Name
+
')'
+
_opS
)
procclose
=
re
.
compile
(
_opS
+
r'\?>'
)
commentopen
=
re
.
compile
(
'<!--'
)
commentclose
=
re
.
compile
(
'-->'
)
...
...
@@ -41,6 +57,7 @@ attrfind = re.compile(
_S
+
'(?P<name>'
+
_Name
+
')'
'('
+
_opS
+
'='
+
_opS
+
'(?P<value>
\'
[^
\'
]*
\'
|"[^"]*"|[-a-zA-Z0-9.:+*
%
?!()_#=~]+))'
)
attrtrans
=
string
.
maketrans
(
'
\r\n\t
'
,
' '
)
# XML parser base class -- find tags and call handler functions.
...
...
@@ -92,30 +109,43 @@ class XMLParser:
self
.
goahead
(
1
)
# Interface -- translate references
def
translate_references
(
self
,
data
):
newdata
=
[]
def
translate_references
(
self
,
data
,
all
=
1
):
i
=
0
while
1
:
res
=
ref
.
search
(
data
,
i
)
res
=
amp
.
search
(
data
,
i
)
if
res
is
None
:
newdata
.
append
(
data
[
i
:])
return
string
.
join
(
newdata
,
''
)
if
data
[
res
.
end
(
0
)
-
1
]
!=
';'
:
return
data
res
=
ref
.
match
(
data
,
res
.
start
(
0
))
if
res
is
None
:
self
.
syntax_error
(
"bogus `&'"
)
i
=
i
+
1
continue
i
=
res
.
end
(
0
)
if
data
[
i
-
1
]
!=
';'
:
self
.
syntax_error
(
"`;' missing after entity/char reference"
)
newdata
.
append
(
data
[
i
:
res
.
start
(
0
)])
i
=
i
-
1
str
=
res
.
group
(
1
)
pre
=
data
[:
res
.
start
(
0
)]
post
=
data
[
i
:]
if
str
[
0
]
==
'#'
:
if
str
[
1
]
==
'x'
:
newdata
.
append
(
chr
(
string
.
atoi
(
str
[
2
:],
16
)
))
str
=
chr
(
string
.
atoi
(
str
[
2
:],
16
))
else
:
newdata
.
append
(
chr
(
string
.
atoi
(
str
[
1
:])))
else
:
try
:
newdata
.
append
(
self
.
entitydefs
[
str
])
except
KeyError
:
str
=
chr
(
string
.
atoi
(
str
[
1
:]))
data
=
pre
+
str
+
post
i
=
res
.
start
(
0
)
+
len
(
str
)
elif
all
:
if
self
.
entitydefs
.
has_key
(
str
):
data
=
pre
+
self
.
entitydefs
[
str
]
+
post
i
=
res
.
start
(
0
)
# rescan substituted text
else
:
self
.
syntax_error
(
'reference to unknown entity'
)
# can't do it, so keep the entity ref in
newdata
.
append
(
'&'
+
str
+
';'
)
i
=
res
.
end
(
0
)
data
=
pre
+
'&'
+
str
+
';'
+
post
i
=
res
.
start
(
0
)
+
len
(
str
)
+
2
else
:
# just translating character references
pass
# i is already postioned correctly
# Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is
...
...
@@ -139,8 +169,14 @@ class XMLParser:
else
:
j
=
n
if
i
<
j
:
if
self
.
__at_start
:
self
.
syntax_error
(
'illegal data at start of file'
)
self
.
__at_start
=
0
data
=
rawdata
[
i
:
j
]
if
not
self
.
stack
and
not
space
.
match
(
data
):
self
.
syntax_error
(
'data not in content'
)
if
illegal
.
search
(
data
):
self
.
syntax_error
(
'illegal character in content'
)
self
.
handle_data
(
data
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
data
,
'
\n
'
)
i
=
j
...
...
@@ -184,6 +220,20 @@ class XMLParser:
self
.
lineno
=
self
.
lineno
+
string
.
count
(
rawdata
[
i
:
i
],
'
\n
'
)
i
=
k
continue
res
=
xmldecl
.
match
(
rawdata
,
i
)
if
res
:
if
not
self
.
__at_start
:
self
.
syntax_error
(
"<?xml?> declaration not at start of document"
)
version
,
encoding
,
standalone
=
res
.
group
(
'version'
,
'encoding'
,
'standalone'
)
if
version
[
1
:
-
1
]
!=
'1.0'
:
raise
RuntimeError
,
'only XML version 1.0 supported'
if
encoding
:
encoding
=
encoding
[
1
:
-
1
]
if
standalone
:
standalone
=
standalone
[
1
:
-
1
]
self
.
handle_xml
(
encoding
,
standalone
)
i
=
res
.
end
(
0
)
continue
res
=
procopen
.
match
(
rawdata
,
i
)
if
res
:
k
=
self
.
parse_proc
(
i
)
...
...
@@ -209,18 +259,6 @@ class XMLParser:
self
.
lineno
=
self
.
lineno
+
string
.
count
(
rawdata
[
i
:
k
],
'
\n
'
)
i
=
k
continue
res
=
special
.
match
(
rawdata
,
i
)
if
res
:
if
self
.
literal
:
data
=
rawdata
[
i
]
self
.
handle_data
(
data
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
data
,
'
\n
'
)
i
=
i
+
1
continue
self
.
handle_special
(
res
.
group
(
'special'
))
self
.
lineno
=
self
.
lineno
+
string
.
count
(
res
.
group
(
0
),
'
\n
'
)
i
=
res
.
end
(
0
)
continue
elif
rawdata
[
i
]
==
'&'
:
res
=
charref
.
match
(
rawdata
,
i
)
if
res
is
not
None
:
...
...
@@ -228,6 +266,8 @@ class XMLParser:
if
rawdata
[
i
-
1
]
!=
';'
:
self
.
syntax_error
(
"`;' missing in charref"
)
i
=
i
-
1
if
not
self
.
stack
:
self
.
syntax_error
(
'data not in content'
)
self
.
handle_charref
(
res
.
group
(
'char'
)[:
-
1
])
self
.
lineno
=
self
.
lineno
+
string
.
count
(
res
.
group
(
0
),
'
\n
'
)
continue
...
...
@@ -237,36 +277,45 @@ class XMLParser:
if
rawdata
[
i
-
1
]
!=
';'
:
self
.
syntax_error
(
"`;' missing in entityref"
)
i
=
i
-
1
self
.
handle_entityref
(
res
.
group
(
'name'
))
name
=
res
.
group
(
'name'
)
if
self
.
entitydefs
.
has_key
(
name
):
self
.
rawdata
=
rawdata
=
rawdata
[:
res
.
start
(
0
)]
+
self
.
entitydefs
[
name
]
+
rawdata
[
i
:]
n
=
len
(
rawdata
)
i
=
res
.
start
(
0
)
else
:
self
.
syntax_error
(
'reference to unknown entity'
)
self
.
unknown_entityref
(
name
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
res
.
group
(
0
),
'
\n
'
)
continue
elif
rawdata
[
i
]
==
']'
:
if
n
-
i
<
3
:
break
if
cdataclose
.
match
(
rawdata
,
i
):
self
.
syntax_error
(
"bogus `]]>'"
)
self
.
handle_data
(
rawdata
[
i
])
i
=
i
+
1
continue
else
:
raise
RuntimeError
,
'neither < nor & ??'
# We get here only if incomplete matches but
# nothing else
res
=
incomplete
.
match
(
rawdata
,
i
)
if
not
res
:
data
=
rawdata
[
i
]
self
.
handle_data
(
data
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
data
,
'
\n
'
)
i
=
i
+
1
continue
j
=
res
.
end
(
0
)
if
j
==
n
:
break
# Really incomplete
self
.
syntax_error
(
"bogus `<' or `&'"
)
data
=
res
.
group
(
0
)
self
.
handle_data
(
data
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
data
,
'
\n
'
)
i
=
j
break
# end while
if
i
>
0
:
self
.
__at_start
=
0
if
end
and
i
<
n
:
data
=
rawdata
[
i
:
n
]
data
=
rawdata
[
i
]
self
.
syntax_error
(
"bogus `
%
s'"
%
data
)
if
illegal
.
search
(
data
):
self
.
syntax_error
(
'illegal character in content'
)
self
.
handle_data
(
data
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
data
,
'
\n
'
)
i
=
n
self
.
rawdata
=
rawdata
[
i
+
1
:]
return
self
.
goahead
(
end
)
self
.
rawdata
=
rawdata
[
i
:]
if
end
:
if
not
self
.
__seen_starttag
:
self
.
syntax_error
(
'no elements in file'
)
if
self
.
stack
:
self
.
syntax_error
(
'missing end tags'
)
while
self
.
stack
:
...
...
@@ -280,9 +329,12 @@ class XMLParser:
res
=
commentclose
.
search
(
rawdata
,
i
+
4
)
if
not
res
:
return
-
1
# doubledash search will succeed because it's a subset of commentclose
if
doubledash
.
search
(
rawdata
,
i
+
4
)
.
start
(
0
)
<
res
.
start
(
0
):
if
doubledash
.
search
(
rawdata
,
i
+
4
,
res
.
start
(
0
)):
self
.
syntax_error
(
"`--' inside comment"
)
if
rawdata
[
res
.
start
(
0
)
-
1
]
==
'-'
:
self
.
syntax_error
(
'comment cannot end in three dashes'
)
if
illegal
.
search
(
rawdata
,
i
+
4
,
res
.
start
(
0
)):
self
.
syntax_error
(
'illegal character in comment'
)
self
.
handle_comment
(
rawdata
[
i
+
4
:
res
.
start
(
0
)])
return
res
.
end
(
0
)
...
...
@@ -291,28 +343,59 @@ class XMLParser:
rawdata
=
self
.
rawdata
n
=
len
(
rawdata
)
name
=
res
.
group
(
'name'
)
pubid
,
syslit
=
res
.
group
(
'pubid'
,
'syslit'
)
if
pubid
is
not
None
:
pubid
=
pubid
[
1
:
-
1
]
# remove quotes
pubid
=
string
.
join
(
string
.
split
(
pubid
))
# normalize
if
syslit
is
not
None
:
syslit
=
syslit
[
1
:
-
1
]
# remove quotes
j
=
k
=
res
.
end
(
0
)
level
=
0
while
k
<
n
:
c
=
rawdata
[
k
]
if
c
==
'<'
:
level
=
level
+
1
elif
c
==
'>'
:
if
level
==
0
:
self
.
handle_doctype
(
name
,
rawdata
[
j
:
k
])
return
k
+
1
level
=
level
-
1
if
k
>=
n
:
return
-
1
if
rawdata
[
k
]
==
'['
:
level
=
0
k
=
k
+
1
return
-
1
dq
=
sq
=
0
while
k
<
n
:
c
=
rawdata
[
k
]
if
not
sq
and
c
==
'"'
:
dq
=
not
dq
elif
not
dq
and
c
==
"'"
:
sq
=
not
sq
elif
sq
or
dq
:
pass
elif
level
<=
0
and
c
==
']'
:
res
=
endbracket
.
match
(
rawdata
,
k
+
1
)
if
not
res
:
return
-
1
self
.
handle_doctype
(
name
,
pubid
,
syslit
,
rawdata
[
j
+
1
:
k
])
return
res
.
end
(
0
)
elif
c
==
'<'
:
level
=
level
+
1
elif
c
==
'>'
:
level
=
level
-
1
if
level
<
0
:
self
.
syntax_error
(
"bogus `>' in DOCTYPE"
)
k
=
k
+
1
res
=
endbracket
.
search
(
rawdata
,
k
)
if
not
res
:
return
-
1
if
res
.
start
(
0
)
!=
k
:
self
.
syntax_error
(
'garbage in DOCTYPE'
)
self
.
handle_doctype
(
name
,
pubid
,
syslit
,
None
)
return
res
.
end
(
0
)
# Internal -- handle CDATA tag, return length or -1 if not terminated
def
parse_cdata
(
self
,
i
):
rawdata
=
self
.
rawdata
if
rawdata
[
i
:
i
+
9
]
<>
'<![CDATA['
:
raise
RuntimeError
,
'unexpected call to
handl
e_cdata'
raise
RuntimeError
,
'unexpected call to
pars
e_cdata'
res
=
cdataclose
.
search
(
rawdata
,
i
+
9
)
if
not
res
:
return
-
1
if
illegal
.
search
(
rawdata
,
i
+
9
,
res
.
start
(
0
)):
self
.
syntax_error
(
'illegal character in CDATA'
)
if
not
self
.
stack
:
self
.
syntax_error
(
'CDATA not in content'
)
self
.
handle_cdata
(
rawdata
[
i
+
9
:
res
.
start
(
0
)])
return
res
.
end
(
0
)
...
...
@@ -324,24 +407,15 @@ class XMLParser:
if
not
end
:
return
-
1
j
=
end
.
start
(
0
)
if
illegal
.
search
(
rawdata
,
i
+
2
,
j
):
self
.
syntax_error
(
'illegal character in processing instruction'
)
res
=
tagfind
.
match
(
rawdata
,
i
+
2
)
if
not
res
:
raise
RuntimeError
,
'unexpected call to parse_proc'
k
=
res
.
end
(
0
)
name
=
res
.
group
(
0
)
if
name
==
'xml'
:
if
self
.
__at_start
:
attrdict
,
k
=
self
.
parse_attributes
(
'xml'
,
k
,
j
,
self
.
__xml_attributes
)
if
k
!=
j
:
self
.
syntax_error
(
'garbage at end of <?xml?>'
)
if
attrdict
[
'version'
]
!=
'1.0'
:
self
.
syntax_error
(
'only XML version 1.0 supported'
)
self
.
handle_xml
(
attrdict
.
get
(
'encoding'
,
None
),
attrdict
[
'standalone'
])
return
end
.
end
(
0
)
else
:
self
.
syntax_error
(
"<?xml?> tag not at start of document"
)
if
string
.
find
(
string
.
lower
(
name
),
'xml'
)
>=
0
:
self
.
syntax_error
(
'illegal processing instruction target name'
)
self
.
handle_proc
(
name
,
rawdata
[
k
:
j
])
return
end
.
end
(
0
)
...
...
@@ -375,6 +449,7 @@ class XMLParser:
(
attrname
,
tag
))
if
attrdict
.
has_key
(
attrname
):
self
.
syntax_error
(
'attribute specified twice'
)
attrvalue
=
string
.
translate
(
attrvalue
,
attrtrans
)
attrdict
[
attrname
]
=
self
.
translate_references
(
attrvalue
)
k
=
res
.
end
(
0
)
if
attributes
is
not
None
:
...
...
@@ -400,6 +475,8 @@ class XMLParser:
if
not
self
.
__seen_starttag
and
self
.
__seen_doctype
:
if
tag
!=
self
.
__seen_doctype
:
self
.
syntax_error
(
'starttag does not match DOCTYPE'
)
if
self
.
__seen_starttag
and
not
self
.
stack
:
self
.
syntax_error
(
'multiple elements on top level'
)
if
hasattr
(
self
,
tag
+
'_attributes'
):
attributes
=
getattr
(
self
,
tag
+
'_attributes'
)
else
:
...
...
@@ -428,10 +505,7 @@ class XMLParser:
tag
=
res
.
group
(
0
)
k
=
res
.
end
(
0
)
if
k
!=
end
.
start
(
0
):
# check that there is only white space at end of tag
res
=
space
.
match
(
rawdata
,
k
)
if
res
is
None
or
res
.
end
(
0
)
!=
end
.
start
(
0
):
self
.
syntax_error
(
'garbage in end tag'
)
self
.
syntax_error
(
'garbage in end tag'
)
self
.
finish_endtag
(
tag
)
return
end
.
end
(
0
)
...
...
@@ -439,17 +513,18 @@ class XMLParser:
# Return -1 for unknown tag, 1 for balanced tag
def
finish_starttag
(
self
,
tag
,
attrs
):
self
.
stack
.
append
(
tag
)
try
:
method
=
getattr
(
self
,
'start_'
+
tag
)
except
AttributeError
:
self
.
unknown_starttag
(
tag
,
attrs
)
return
-
1
else
:
methodname
=
'start_'
+
tag
if
hasattr
(
self
,
methodname
):
method
=
getattr
(
self
,
methodname
)
self
.
handle_starttag
(
tag
,
method
,
attrs
)
return
1
else
:
self
.
unknown_starttag
(
tag
,
attrs
)
return
-
1
# Internal -- finish processing of end tag
def
finish_endtag
(
self
,
tag
):
methodname
=
'end_'
+
tag
if
not
tag
:
self
.
syntax_error
(
'name-less end tag'
)
found
=
len
(
self
.
stack
)
-
1
...
...
@@ -459,9 +534,10 @@ class XMLParser:
else
:
if
tag
not
in
self
.
stack
:
self
.
syntax_error
(
'unopened end tag'
)
try
:
method
=
getattr
(
self
,
'end_'
+
tag
)
except
AttributeError
:
if
hasattr
(
self
,
methodname
):
method
=
getattr
(
self
,
methodname
)
self
.
handle_endtag
(
tag
,
method
)
else
:
self
.
unknown_endtag
(
tag
)
return
found
=
len
(
self
.
stack
)
...
...
@@ -472,11 +548,8 @@ class XMLParser:
if
found
<
len
(
self
.
stack
)
-
1
:
self
.
syntax_error
(
'missing close tag for
%
s'
%
self
.
stack
[
-
1
])
tag
=
self
.
stack
[
-
1
]
try
:
method
=
getattr
(
self
,
'end_'
+
tag
)
except
AttributeError
:
method
=
None
if
method
:
if
hasattr
(
self
,
methodname
):
method
=
getattr
(
self
,
methodname
)
self
.
handle_endtag
(
tag
,
method
)
else
:
self
.
unknown_endtag
(
tag
)
...
...
@@ -487,7 +560,7 @@ class XMLParser:
pass
# Overridable -- handle DOCTYPE
def
handle_doctype
(
self
,
tag
,
data
):
def
handle_doctype
(
self
,
tag
,
pubid
,
syslit
,
data
):
pass
# Overridable -- handle start tag
...
...
@@ -514,7 +587,12 @@ class XMLParser:
self
.
handle_data
(
chr
(
n
))
# Definition of entities -- derived classes may override
entitydefs
=
{
'lt'
:
'<'
,
'gt'
:
'>'
,
'amp'
:
'&'
,
'quot'
:
'"'
,
'apos'
:
"'"
}
entitydefs
=
{
'lt'
:
'<'
,
# must use charref
'gt'
:
'>'
,
'amp'
:
'&'
,
# must use charref
'quot'
:
'"'
,
'apos'
:
'''
,
}
# Example -- handle entity reference, no need to override
def
handle_entityref
(
self
,
name
):
...
...
@@ -541,10 +619,6 @@ class XMLParser:
def
handle_proc
(
self
,
name
,
data
):
pass
# Example -- handle special instructions, could be overridden
def
handle_special
(
self
,
data
):
pass
# Example -- handle relatively harmless syntax errors, could be overridden
def
syntax_error
(
self
,
message
):
raise
RuntimeError
,
'Syntax error at line
%
d:
%
s'
%
(
self
.
lineno
,
message
)
...
...
@@ -566,10 +640,14 @@ class TestXMLParser(XMLParser):
self
.
flush
()
print
'xml: encoding ='
,
encoding
,
'standalone ='
,
standalone
def
handle_doctype
(
self
,
tag
,
data
):
def
handle_doctype
(
self
,
tag
,
pubid
,
syslit
,
data
):
self
.
flush
()
print
'DOCTYPE:'
,
tag
,
`data`
def
handle_entity
(
self
,
name
,
strval
,
pubid
,
syslit
,
ndata
):
self
.
flush
()
print
'ENTITY:'
,
`data`
def
handle_data
(
self
,
data
):
self
.
testdata
=
self
.
testdata
+
data
if
len
(
`self.testdata`
)
>=
70
:
...
...
@@ -589,10 +667,6 @@ class TestXMLParser(XMLParser):
self
.
flush
()
print
'processing:'
,
name
,
`data`
def
handle_special
(
self
,
data
):
self
.
flush
()
print
'special:'
,
`data`
def
handle_comment
(
self
,
data
):
self
.
flush
()
r
=
`data`
...
...
@@ -660,9 +734,13 @@ def test(args = None):
f
.
close
()
x
=
klass
()
for
c
in
data
:
x
.
feed
(
c
)
x
.
close
()
try
:
for
c
in
data
:
x
.
feed
(
c
)
x
.
close
()
except
RuntimeError
,
msg
:
print
msg
sys
.
exit
(
1
)
if
__name__
==
'__main__'
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment