Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
7e07b384
Kaydet (Commit)
7e07b384
authored
Nis 03, 1998
tarafından
Guido van Rossum
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Sjoerd's latest.
üst
0454b512
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
183 additions
and
105 deletions
+183
-105
xmllib.py
Lib/xmllib.py
+183
-105
No files found.
Lib/xmllib.py
Dosyayı görüntüle @
7e07b384
...
@@ -5,34 +5,50 @@ import re
...
@@ -5,34 +5,50 @@ import re
import
string
import
string
version
=
'0.1'
# Regular expressions used for parsing
# Regular expressions used for parsing
_S
=
'[
\t\r\n
]+'
_S
=
'[
\t\r\n
]+'
_opS
=
'[
\t\r\n
]*'
_opS
=
'[
\t\r\n
]*'
_Name
=
'[a-zA-Z_:][-a-zA-Z0-9._:]*'
_Name
=
'[a-zA-Z_:][-a-zA-Z0-9._:]*'
interesting
=
re
.
compile
(
'[&<]'
)
illegal
=
re
.
compile
(
'[^
\t\r\n
-
\176\240
-
\377
]'
)
# illegal chars in content
incomplete
=
re
.
compile
(
'&('
+
_Name
+
'|#[0-9]*|#x[0-9a-fA-F]*)?|'
interesting
=
re
.
compile
(
'[]&<]'
)
'<([a-zA-Z_:][^<>]*|'
'/([a-zA-Z_:][^<>]*)?|'
amp
=
re
.
compile
(
'&'
)
'![^<>]*|'
ref
=
re
.
compile
(
'&('
+
_Name
+
'|#[0-9]+|#x[0-9a-fA-F]+)[^-a-zA-Z0-9._:]'
)
r'\?[^<>]*)?'
)
ref
=
re
.
compile
(
'&('
+
_Name
+
'|#[0-9]+|#x[0-9a-fA-F]+);?'
)
entityref
=
re
.
compile
(
'&(?P<name>'
+
_Name
+
')[^-a-zA-Z0-9._:]'
)
entityref
=
re
.
compile
(
'&(?P<name>'
+
_Name
+
')[^-a-zA-Z0-9._:]'
)
charref
=
re
.
compile
(
'&#(?P<char>[0-9]+[^0-9]|x[0-9a-fA-F]+[^0-9a-fA-F])'
)
charref
=
re
.
compile
(
'&#(?P<char>[0-9]+[^0-9]|x[0-9a-fA-F]+[^0-9a-fA-F])'
)
space
=
re
.
compile
(
_S
)
space
=
re
.
compile
(
_S
+
'$'
)
newline
=
re
.
compile
(
'
\n
'
)
newline
=
re
.
compile
(
'
\n
'
)
starttagopen
=
re
.
compile
(
'<'
+
_Name
)
starttagopen
=
re
.
compile
(
'<'
+
_Name
)
endtagopen
=
re
.
compile
(
'</'
)
endtagopen
=
re
.
compile
(
'</'
)
starttagend
=
re
.
compile
(
_opS
+
'(?P<slash>/?)>'
)
starttagend
=
re
.
compile
(
_opS
+
'(?P<slash>/?)>'
)
endbracket
=
re
.
compile
(
'>'
)
endbracket
=
re
.
compile
(
_opS
+
'>'
)
tagfind
=
re
.
compile
(
_Name
)
tagfind
=
re
.
compile
(
_Name
)
cdataopen
=
re
.
compile
(
r'<!\[CDATA\['
)
cdataopen
=
re
.
compile
(
r'<!\[CDATA\['
)
cdataclose
=
re
.
compile
(
r'\]\]>'
)
cdataclose
=
re
.
compile
(
r'\]\]>'
)
doctype
=
re
.
compile
(
'<!DOCTYPE'
+
_S
+
'(?P<name>'
+
_Name
+
')'
+
_S
)
# this matches one of the following:
special
=
re
.
compile
(
'<!(?P<special>[^<>]*)>'
)
# SYSTEM SystemLiteral
procopen
=
re
.
compile
(
r'<\?(?P<proc>'
+
_Name
+
')'
+
_S
)
# PUBLIC PubidLiteral SystemLiteral
_SystemLiteral
=
'(?P<
%
s>
\'
[^
\'
]*
\'
|"[^"]*")'
_PublicLiteral
=
'(?P<
%
s>"[-
\'
()+,./:=?;!*#@$_
%%
\n\r
a-zA-Z0-9]*"|'
\
"'[-()+,./:=?;!*#@$_
%%
\n\r
a-zA-Z0-9]*')"
_ExternalId
=
'(?:SYSTEM|'
\
'PUBLIC'
+
_S
+
_PublicLiteral
%
'pubid'
+
\
')'
+
_S
+
_SystemLiteral
%
'syslit'
doctype
=
re
.
compile
(
'<!DOCTYPE'
+
_S
+
'(?P<name>'
+
_Name
+
')'
'(?:'
+
_S
+
_ExternalId
+
')?'
+
_opS
)
xmldecl
=
re
.
compile
(
'<
\
?xml'
+
_S
+
'version'
+
_opS
+
'='
+
_opS
+
'(?P<version>
\'
[^
\'
]*
\'
|"[^"]*")'
+
'(?:'
+
_S
+
'encoding'
+
_opS
+
'='
+
_opS
+
"(?P<encoding>'[A-Za-z][-A-Za-z0-9._]*'|"
'"[A-Za-z][-A-Za-z0-9._]*"))?'
'(?:'
+
_S
+
'standalone'
+
_opS
+
'='
+
_opS
+
'(?P<standalone>
\'
(?:yes|no)
\'
|"(?:yes|no)"))?'
+
_opS
+
'
\
?>'
)
procopen
=
re
.
compile
(
r'<\?(?P<proc>'
+
_Name
+
')'
+
_opS
)
procclose
=
re
.
compile
(
_opS
+
r'\?>'
)
procclose
=
re
.
compile
(
_opS
+
r'\?>'
)
commentopen
=
re
.
compile
(
'<!--'
)
commentopen
=
re
.
compile
(
'<!--'
)
commentclose
=
re
.
compile
(
'-->'
)
commentclose
=
re
.
compile
(
'-->'
)
...
@@ -41,6 +57,7 @@ attrfind = re.compile(
...
@@ -41,6 +57,7 @@ attrfind = re.compile(
_S
+
'(?P<name>'
+
_Name
+
')'
_S
+
'(?P<name>'
+
_Name
+
')'
'('
+
_opS
+
'='
+
_opS
+
'('
+
_opS
+
'='
+
_opS
+
'(?P<value>
\'
[^
\'
]*
\'
|"[^"]*"|[-a-zA-Z0-9.:+*
%
?!()_#=~]+))'
)
'(?P<value>
\'
[^
\'
]*
\'
|"[^"]*"|[-a-zA-Z0-9.:+*
%
?!()_#=~]+))'
)
attrtrans
=
string
.
maketrans
(
'
\r\n\t
'
,
' '
)
# XML parser base class -- find tags and call handler functions.
# XML parser base class -- find tags and call handler functions.
...
@@ -92,30 +109,43 @@ class XMLParser:
...
@@ -92,30 +109,43 @@ class XMLParser:
self
.
goahead
(
1
)
self
.
goahead
(
1
)
# Interface -- translate references
# Interface -- translate references
def
translate_references
(
self
,
data
):
def
translate_references
(
self
,
data
,
all
=
1
):
newdata
=
[]
i
=
0
i
=
0
while
1
:
while
1
:
res
=
ref
.
search
(
data
,
i
)
res
=
amp
.
search
(
data
,
i
)
if
res
is
None
:
return
data
res
=
ref
.
match
(
data
,
res
.
start
(
0
))
if
res
is
None
:
if
res
is
None
:
newdata
.
append
(
data
[
i
:])
self
.
syntax_error
(
"bogus `&'"
)
return
string
.
join
(
newdata
,
''
)
i
=
i
+
1
if
data
[
res
.
end
(
0
)
-
1
]
!=
';'
:
continue
i
=
res
.
end
(
0
)
if
data
[
i
-
1
]
!=
';'
:
self
.
syntax_error
(
"`;' missing after entity/char reference"
)
self
.
syntax_error
(
"`;' missing after entity/char reference"
)
newdata
.
append
(
data
[
i
:
res
.
start
(
0
)])
i
=
i
-
1
str
=
res
.
group
(
1
)
str
=
res
.
group
(
1
)
pre
=
data
[:
res
.
start
(
0
)]
post
=
data
[
i
:]
if
str
[
0
]
==
'#'
:
if
str
[
0
]
==
'#'
:
if
str
[
1
]
==
'x'
:
if
str
[
1
]
==
'x'
:
newdata
.
append
(
chr
(
string
.
atoi
(
str
[
2
:],
16
)
))
str
=
chr
(
string
.
atoi
(
str
[
2
:],
16
))
else
:
else
:
newdata
.
append
(
chr
(
string
.
atoi
(
str
[
1
:])))
str
=
chr
(
string
.
atoi
(
str
[
1
:]))
data
=
pre
+
str
+
post
i
=
res
.
start
(
0
)
+
len
(
str
)
elif
all
:
if
self
.
entitydefs
.
has_key
(
str
):
data
=
pre
+
self
.
entitydefs
[
str
]
+
post
i
=
res
.
start
(
0
)
# rescan substituted text
else
:
else
:
try
:
self
.
syntax_error
(
'reference to unknown entity'
)
newdata
.
append
(
self
.
entitydefs
[
str
])
except
KeyError
:
# can't do it, so keep the entity ref in
# can't do it, so keep the entity ref in
newdata
.
append
(
'&'
+
str
+
';'
)
data
=
pre
+
'&'
+
str
+
';'
+
post
i
=
res
.
end
(
0
)
i
=
res
.
start
(
0
)
+
len
(
str
)
+
2
else
:
# just translating character references
pass
# i is already postioned correctly
# Internal -- handle data as far as reasonable. May leave state
# Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is
# and data to be processed by a subsequent call. If 'end' is
...
@@ -139,8 +169,14 @@ class XMLParser:
...
@@ -139,8 +169,14 @@ class XMLParser:
else
:
else
:
j
=
n
j
=
n
if
i
<
j
:
if
i
<
j
:
if
self
.
__at_start
:
self
.
syntax_error
(
'illegal data at start of file'
)
self
.
__at_start
=
0
self
.
__at_start
=
0
data
=
rawdata
[
i
:
j
]
data
=
rawdata
[
i
:
j
]
if
not
self
.
stack
and
not
space
.
match
(
data
):
self
.
syntax_error
(
'data not in content'
)
if
illegal
.
search
(
data
):
self
.
syntax_error
(
'illegal character in content'
)
self
.
handle_data
(
data
)
self
.
handle_data
(
data
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
data
,
'
\n
'
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
data
,
'
\n
'
)
i
=
j
i
=
j
...
@@ -184,6 +220,20 @@ class XMLParser:
...
@@ -184,6 +220,20 @@ class XMLParser:
self
.
lineno
=
self
.
lineno
+
string
.
count
(
rawdata
[
i
:
i
],
'
\n
'
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
rawdata
[
i
:
i
],
'
\n
'
)
i
=
k
i
=
k
continue
continue
res
=
xmldecl
.
match
(
rawdata
,
i
)
if
res
:
if
not
self
.
__at_start
:
self
.
syntax_error
(
"<?xml?> declaration not at start of document"
)
version
,
encoding
,
standalone
=
res
.
group
(
'version'
,
'encoding'
,
'standalone'
)
if
version
[
1
:
-
1
]
!=
'1.0'
:
raise
RuntimeError
,
'only XML version 1.0 supported'
if
encoding
:
encoding
=
encoding
[
1
:
-
1
]
if
standalone
:
standalone
=
standalone
[
1
:
-
1
]
self
.
handle_xml
(
encoding
,
standalone
)
i
=
res
.
end
(
0
)
continue
res
=
procopen
.
match
(
rawdata
,
i
)
res
=
procopen
.
match
(
rawdata
,
i
)
if
res
:
if
res
:
k
=
self
.
parse_proc
(
i
)
k
=
self
.
parse_proc
(
i
)
...
@@ -209,18 +259,6 @@ class XMLParser:
...
@@ -209,18 +259,6 @@ class XMLParser:
self
.
lineno
=
self
.
lineno
+
string
.
count
(
rawdata
[
i
:
k
],
'
\n
'
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
rawdata
[
i
:
k
],
'
\n
'
)
i
=
k
i
=
k
continue
continue
res
=
special
.
match
(
rawdata
,
i
)
if
res
:
if
self
.
literal
:
data
=
rawdata
[
i
]
self
.
handle_data
(
data
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
data
,
'
\n
'
)
i
=
i
+
1
continue
self
.
handle_special
(
res
.
group
(
'special'
))
self
.
lineno
=
self
.
lineno
+
string
.
count
(
res
.
group
(
0
),
'
\n
'
)
i
=
res
.
end
(
0
)
continue
elif
rawdata
[
i
]
==
'&'
:
elif
rawdata
[
i
]
==
'&'
:
res
=
charref
.
match
(
rawdata
,
i
)
res
=
charref
.
match
(
rawdata
,
i
)
if
res
is
not
None
:
if
res
is
not
None
:
...
@@ -228,6 +266,8 @@ class XMLParser:
...
@@ -228,6 +266,8 @@ class XMLParser:
if
rawdata
[
i
-
1
]
!=
';'
:
if
rawdata
[
i
-
1
]
!=
';'
:
self
.
syntax_error
(
"`;' missing in charref"
)
self
.
syntax_error
(
"`;' missing in charref"
)
i
=
i
-
1
i
=
i
-
1
if
not
self
.
stack
:
self
.
syntax_error
(
'data not in content'
)
self
.
handle_charref
(
res
.
group
(
'char'
)[:
-
1
])
self
.
handle_charref
(
res
.
group
(
'char'
)[:
-
1
])
self
.
lineno
=
self
.
lineno
+
string
.
count
(
res
.
group
(
0
),
'
\n
'
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
res
.
group
(
0
),
'
\n
'
)
continue
continue
...
@@ -237,36 +277,45 @@ class XMLParser:
...
@@ -237,36 +277,45 @@ class XMLParser:
if
rawdata
[
i
-
1
]
!=
';'
:
if
rawdata
[
i
-
1
]
!=
';'
:
self
.
syntax_error
(
"`;' missing in entityref"
)
self
.
syntax_error
(
"`;' missing in entityref"
)
i
=
i
-
1
i
=
i
-
1
self
.
handle_entityref
(
res
.
group
(
'name'
))
name
=
res
.
group
(
'name'
)
if
self
.
entitydefs
.
has_key
(
name
):
self
.
rawdata
=
rawdata
=
rawdata
[:
res
.
start
(
0
)]
+
self
.
entitydefs
[
name
]
+
rawdata
[
i
:]
n
=
len
(
rawdata
)
i
=
res
.
start
(
0
)
else
:
self
.
syntax_error
(
'reference to unknown entity'
)
self
.
unknown_entityref
(
name
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
res
.
group
(
0
),
'
\n
'
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
res
.
group
(
0
),
'
\n
'
)
continue
continue
elif
rawdata
[
i
]
==
']'
:
if
n
-
i
<
3
:
break
if
cdataclose
.
match
(
rawdata
,
i
):
self
.
syntax_error
(
"bogus `]]>'"
)
self
.
handle_data
(
rawdata
[
i
])
i
=
i
+
1
continue
else
:
else
:
raise
RuntimeError
,
'neither < nor & ??'
raise
RuntimeError
,
'neither < nor & ??'
# We get here only if incomplete matches but
# We get here only if incomplete matches but
# nothing else
# nothing else
res
=
incomplete
.
match
(
rawdata
,
i
)
break
if
not
res
:
data
=
rawdata
[
i
]
self
.
handle_data
(
data
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
data
,
'
\n
'
)
i
=
i
+
1
continue
j
=
res
.
end
(
0
)
if
j
==
n
:
break
# Really incomplete
self
.
syntax_error
(
"bogus `<' or `&'"
)
data
=
res
.
group
(
0
)
self
.
handle_data
(
data
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
data
,
'
\n
'
)
i
=
j
# end while
# end while
if
i
>
0
:
self
.
__at_start
=
0
if
end
and
i
<
n
:
if
end
and
i
<
n
:
data
=
rawdata
[
i
:
n
]
data
=
rawdata
[
i
]
self
.
syntax_error
(
"bogus `
%
s'"
%
data
)
if
illegal
.
search
(
data
):
self
.
syntax_error
(
'illegal character in content'
)
self
.
handle_data
(
data
)
self
.
handle_data
(
data
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
data
,
'
\n
'
)
self
.
lineno
=
self
.
lineno
+
string
.
count
(
data
,
'
\n
'
)
i
=
n
self
.
rawdata
=
rawdata
[
i
+
1
:]
return
self
.
goahead
(
end
)
self
.
rawdata
=
rawdata
[
i
:]
self
.
rawdata
=
rawdata
[
i
:]
if
end
:
if
end
:
if
not
self
.
__seen_starttag
:
self
.
syntax_error
(
'no elements in file'
)
if
self
.
stack
:
if
self
.
stack
:
self
.
syntax_error
(
'missing end tags'
)
self
.
syntax_error
(
'missing end tags'
)
while
self
.
stack
:
while
self
.
stack
:
...
@@ -280,9 +329,12 @@ class XMLParser:
...
@@ -280,9 +329,12 @@ class XMLParser:
res
=
commentclose
.
search
(
rawdata
,
i
+
4
)
res
=
commentclose
.
search
(
rawdata
,
i
+
4
)
if
not
res
:
if
not
res
:
return
-
1
return
-
1
# doubledash search will succeed because it's a subset of commentclose
if
doubledash
.
search
(
rawdata
,
i
+
4
,
res
.
start
(
0
)):
if
doubledash
.
search
(
rawdata
,
i
+
4
)
.
start
(
0
)
<
res
.
start
(
0
):
self
.
syntax_error
(
"`--' inside comment"
)
self
.
syntax_error
(
"`--' inside comment"
)
if
rawdata
[
res
.
start
(
0
)
-
1
]
==
'-'
:
self
.
syntax_error
(
'comment cannot end in three dashes'
)
if
illegal
.
search
(
rawdata
,
i
+
4
,
res
.
start
(
0
)):
self
.
syntax_error
(
'illegal character in comment'
)
self
.
handle_comment
(
rawdata
[
i
+
4
:
res
.
start
(
0
)])
self
.
handle_comment
(
rawdata
[
i
+
4
:
res
.
start
(
0
)])
return
res
.
end
(
0
)
return
res
.
end
(
0
)
...
@@ -291,28 +343,59 @@ class XMLParser:
...
@@ -291,28 +343,59 @@ class XMLParser:
rawdata
=
self
.
rawdata
rawdata
=
self
.
rawdata
n
=
len
(
rawdata
)
n
=
len
(
rawdata
)
name
=
res
.
group
(
'name'
)
name
=
res
.
group
(
'name'
)
pubid
,
syslit
=
res
.
group
(
'pubid'
,
'syslit'
)
if
pubid
is
not
None
:
pubid
=
pubid
[
1
:
-
1
]
# remove quotes
pubid
=
string
.
join
(
string
.
split
(
pubid
))
# normalize
if
syslit
is
not
None
:
syslit
=
syslit
[
1
:
-
1
]
# remove quotes
j
=
k
=
res
.
end
(
0
)
j
=
k
=
res
.
end
(
0
)
if
k
>=
n
:
return
-
1
if
rawdata
[
k
]
==
'['
:
level
=
0
level
=
0
k
=
k
+
1
dq
=
sq
=
0
while
k
<
n
:
while
k
<
n
:
c
=
rawdata
[
k
]
c
=
rawdata
[
k
]
if
c
==
'<'
:
if
not
sq
and
c
==
'"'
:
dq
=
not
dq
elif
not
dq
and
c
==
"'"
:
sq
=
not
sq
elif
sq
or
dq
:
pass
elif
level
<=
0
and
c
==
']'
:
res
=
endbracket
.
match
(
rawdata
,
k
+
1
)
if
not
res
:
return
-
1
self
.
handle_doctype
(
name
,
pubid
,
syslit
,
rawdata
[
j
+
1
:
k
])
return
res
.
end
(
0
)
elif
c
==
'<'
:
level
=
level
+
1
level
=
level
+
1
elif
c
==
'>'
:
elif
c
==
'>'
:
if
level
==
0
:
self
.
handle_doctype
(
name
,
rawdata
[
j
:
k
])
return
k
+
1
level
=
level
-
1
level
=
level
-
1
if
level
<
0
:
self
.
syntax_error
(
"bogus `>' in DOCTYPE"
)
k
=
k
+
1
k
=
k
+
1
res
=
endbracket
.
search
(
rawdata
,
k
)
if
not
res
:
return
-
1
return
-
1
if
res
.
start
(
0
)
!=
k
:
self
.
syntax_error
(
'garbage in DOCTYPE'
)
self
.
handle_doctype
(
name
,
pubid
,
syslit
,
None
)
return
res
.
end
(
0
)
# Internal -- handle CDATA tag, return length or -1 if not terminated
# Internal -- handle CDATA tag, return length or -1 if not terminated
def
parse_cdata
(
self
,
i
):
def
parse_cdata
(
self
,
i
):
rawdata
=
self
.
rawdata
rawdata
=
self
.
rawdata
if
rawdata
[
i
:
i
+
9
]
<>
'<![CDATA['
:
if
rawdata
[
i
:
i
+
9
]
<>
'<![CDATA['
:
raise
RuntimeError
,
'unexpected call to
handl
e_cdata'
raise
RuntimeError
,
'unexpected call to
pars
e_cdata'
res
=
cdataclose
.
search
(
rawdata
,
i
+
9
)
res
=
cdataclose
.
search
(
rawdata
,
i
+
9
)
if
not
res
:
if
not
res
:
return
-
1
return
-
1
if
illegal
.
search
(
rawdata
,
i
+
9
,
res
.
start
(
0
)):
self
.
syntax_error
(
'illegal character in CDATA'
)
if
not
self
.
stack
:
self
.
syntax_error
(
'CDATA not in content'
)
self
.
handle_cdata
(
rawdata
[
i
+
9
:
res
.
start
(
0
)])
self
.
handle_cdata
(
rawdata
[
i
+
9
:
res
.
start
(
0
)])
return
res
.
end
(
0
)
return
res
.
end
(
0
)
...
@@ -324,24 +407,15 @@ class XMLParser:
...
@@ -324,24 +407,15 @@ class XMLParser:
if
not
end
:
if
not
end
:
return
-
1
return
-
1
j
=
end
.
start
(
0
)
j
=
end
.
start
(
0
)
if
illegal
.
search
(
rawdata
,
i
+
2
,
j
):
self
.
syntax_error
(
'illegal character in processing instruction'
)
res
=
tagfind
.
match
(
rawdata
,
i
+
2
)
res
=
tagfind
.
match
(
rawdata
,
i
+
2
)
if
not
res
:
if
not
res
:
raise
RuntimeError
,
'unexpected call to parse_proc'
raise
RuntimeError
,
'unexpected call to parse_proc'
k
=
res
.
end
(
0
)
k
=
res
.
end
(
0
)
name
=
res
.
group
(
0
)
name
=
res
.
group
(
0
)
if
name
==
'xml'
:
if
string
.
find
(
string
.
lower
(
name
),
'xml'
)
>=
0
:
if
self
.
__at_start
:
self
.
syntax_error
(
'illegal processing instruction target name'
)
attrdict
,
k
=
self
.
parse_attributes
(
'xml'
,
k
,
j
,
self
.
__xml_attributes
)
if
k
!=
j
:
self
.
syntax_error
(
'garbage at end of <?xml?>'
)
if
attrdict
[
'version'
]
!=
'1.0'
:
self
.
syntax_error
(
'only XML version 1.0 supported'
)
self
.
handle_xml
(
attrdict
.
get
(
'encoding'
,
None
),
attrdict
[
'standalone'
])
return
end
.
end
(
0
)
else
:
self
.
syntax_error
(
"<?xml?> tag not at start of document"
)
self
.
handle_proc
(
name
,
rawdata
[
k
:
j
])
self
.
handle_proc
(
name
,
rawdata
[
k
:
j
])
return
end
.
end
(
0
)
return
end
.
end
(
0
)
...
@@ -375,6 +449,7 @@ class XMLParser:
...
@@ -375,6 +449,7 @@ class XMLParser:
(
attrname
,
tag
))
(
attrname
,
tag
))
if
attrdict
.
has_key
(
attrname
):
if
attrdict
.
has_key
(
attrname
):
self
.
syntax_error
(
'attribute specified twice'
)
self
.
syntax_error
(
'attribute specified twice'
)
attrvalue
=
string
.
translate
(
attrvalue
,
attrtrans
)
attrdict
[
attrname
]
=
self
.
translate_references
(
attrvalue
)
attrdict
[
attrname
]
=
self
.
translate_references
(
attrvalue
)
k
=
res
.
end
(
0
)
k
=
res
.
end
(
0
)
if
attributes
is
not
None
:
if
attributes
is
not
None
:
...
@@ -400,6 +475,8 @@ class XMLParser:
...
@@ -400,6 +475,8 @@ class XMLParser:
if
not
self
.
__seen_starttag
and
self
.
__seen_doctype
:
if
not
self
.
__seen_starttag
and
self
.
__seen_doctype
:
if
tag
!=
self
.
__seen_doctype
:
if
tag
!=
self
.
__seen_doctype
:
self
.
syntax_error
(
'starttag does not match DOCTYPE'
)
self
.
syntax_error
(
'starttag does not match DOCTYPE'
)
if
self
.
__seen_starttag
and
not
self
.
stack
:
self
.
syntax_error
(
'multiple elements on top level'
)
if
hasattr
(
self
,
tag
+
'_attributes'
):
if
hasattr
(
self
,
tag
+
'_attributes'
):
attributes
=
getattr
(
self
,
tag
+
'_attributes'
)
attributes
=
getattr
(
self
,
tag
+
'_attributes'
)
else
:
else
:
...
@@ -428,9 +505,6 @@ class XMLParser:
...
@@ -428,9 +505,6 @@ class XMLParser:
tag
=
res
.
group
(
0
)
tag
=
res
.
group
(
0
)
k
=
res
.
end
(
0
)
k
=
res
.
end
(
0
)
if
k
!=
end
.
start
(
0
):
if
k
!=
end
.
start
(
0
):
# check that there is only white space at end of tag
res
=
space
.
match
(
rawdata
,
k
)
if
res
is
None
or
res
.
end
(
0
)
!=
end
.
start
(
0
):
self
.
syntax_error
(
'garbage in end tag'
)
self
.
syntax_error
(
'garbage in end tag'
)
self
.
finish_endtag
(
tag
)
self
.
finish_endtag
(
tag
)
return
end
.
end
(
0
)
return
end
.
end
(
0
)
...
@@ -439,17 +513,18 @@ class XMLParser:
...
@@ -439,17 +513,18 @@ class XMLParser:
# Return -1 for unknown tag, 1 for balanced tag
# Return -1 for unknown tag, 1 for balanced tag
def
finish_starttag
(
self
,
tag
,
attrs
):
def
finish_starttag
(
self
,
tag
,
attrs
):
self
.
stack
.
append
(
tag
)
self
.
stack
.
append
(
tag
)
try
:
methodname
=
'start_'
+
tag
method
=
getattr
(
self
,
'start_'
+
tag
)
if
hasattr
(
self
,
methodname
):
except
AttributeError
:
method
=
getattr
(
self
,
methodname
)
self
.
unknown_starttag
(
tag
,
attrs
)
return
-
1
else
:
self
.
handle_starttag
(
tag
,
method
,
attrs
)
self
.
handle_starttag
(
tag
,
method
,
attrs
)
return
1
return
1
else
:
self
.
unknown_starttag
(
tag
,
attrs
)
return
-
1
# Internal -- finish processing of end tag
# Internal -- finish processing of end tag
def
finish_endtag
(
self
,
tag
):
def
finish_endtag
(
self
,
tag
):
methodname
=
'end_'
+
tag
if
not
tag
:
if
not
tag
:
self
.
syntax_error
(
'name-less end tag'
)
self
.
syntax_error
(
'name-less end tag'
)
found
=
len
(
self
.
stack
)
-
1
found
=
len
(
self
.
stack
)
-
1
...
@@ -459,9 +534,10 @@ class XMLParser:
...
@@ -459,9 +534,10 @@ class XMLParser:
else
:
else
:
if
tag
not
in
self
.
stack
:
if
tag
not
in
self
.
stack
:
self
.
syntax_error
(
'unopened end tag'
)
self
.
syntax_error
(
'unopened end tag'
)
try
:
if
hasattr
(
self
,
methodname
):
method
=
getattr
(
self
,
'end_'
+
tag
)
method
=
getattr
(
self
,
methodname
)
except
AttributeError
:
self
.
handle_endtag
(
tag
,
method
)
else
:
self
.
unknown_endtag
(
tag
)
self
.
unknown_endtag
(
tag
)
return
return
found
=
len
(
self
.
stack
)
found
=
len
(
self
.
stack
)
...
@@ -472,11 +548,8 @@ class XMLParser:
...
@@ -472,11 +548,8 @@ class XMLParser:
if
found
<
len
(
self
.
stack
)
-
1
:
if
found
<
len
(
self
.
stack
)
-
1
:
self
.
syntax_error
(
'missing close tag for
%
s'
%
self
.
stack
[
-
1
])
self
.
syntax_error
(
'missing close tag for
%
s'
%
self
.
stack
[
-
1
])
tag
=
self
.
stack
[
-
1
]
tag
=
self
.
stack
[
-
1
]
try
:
if
hasattr
(
self
,
methodname
):
method
=
getattr
(
self
,
'end_'
+
tag
)
method
=
getattr
(
self
,
methodname
)
except
AttributeError
:
method
=
None
if
method
:
self
.
handle_endtag
(
tag
,
method
)
self
.
handle_endtag
(
tag
,
method
)
else
:
else
:
self
.
unknown_endtag
(
tag
)
self
.
unknown_endtag
(
tag
)
...
@@ -487,7 +560,7 @@ class XMLParser:
...
@@ -487,7 +560,7 @@ class XMLParser:
pass
pass
# Overridable -- handle DOCTYPE
# Overridable -- handle DOCTYPE
def
handle_doctype
(
self
,
tag
,
data
):
def
handle_doctype
(
self
,
tag
,
pubid
,
syslit
,
data
):
pass
pass
# Overridable -- handle start tag
# Overridable -- handle start tag
...
@@ -514,7 +587,12 @@ class XMLParser:
...
@@ -514,7 +587,12 @@ class XMLParser:
self
.
handle_data
(
chr
(
n
))
self
.
handle_data
(
chr
(
n
))
# Definition of entities -- derived classes may override
# Definition of entities -- derived classes may override
entitydefs
=
{
'lt'
:
'<'
,
'gt'
:
'>'
,
'amp'
:
'&'
,
'quot'
:
'"'
,
'apos'
:
"'"
}
entitydefs
=
{
'lt'
:
'<'
,
# must use charref
'gt'
:
'>'
,
'amp'
:
'&'
,
# must use charref
'quot'
:
'"'
,
'apos'
:
'''
,
}
# Example -- handle entity reference, no need to override
# Example -- handle entity reference, no need to override
def
handle_entityref
(
self
,
name
):
def
handle_entityref
(
self
,
name
):
...
@@ -541,10 +619,6 @@ class XMLParser:
...
@@ -541,10 +619,6 @@ class XMLParser:
def
handle_proc
(
self
,
name
,
data
):
def
handle_proc
(
self
,
name
,
data
):
pass
pass
# Example -- handle special instructions, could be overridden
def
handle_special
(
self
,
data
):
pass
# Example -- handle relatively harmless syntax errors, could be overridden
# Example -- handle relatively harmless syntax errors, could be overridden
def
syntax_error
(
self
,
message
):
def
syntax_error
(
self
,
message
):
raise
RuntimeError
,
'Syntax error at line
%
d:
%
s'
%
(
self
.
lineno
,
message
)
raise
RuntimeError
,
'Syntax error at line
%
d:
%
s'
%
(
self
.
lineno
,
message
)
...
@@ -566,10 +640,14 @@ class TestXMLParser(XMLParser):
...
@@ -566,10 +640,14 @@ class TestXMLParser(XMLParser):
self
.
flush
()
self
.
flush
()
print
'xml: encoding ='
,
encoding
,
'standalone ='
,
standalone
print
'xml: encoding ='
,
encoding
,
'standalone ='
,
standalone
def
handle_doctype
(
self
,
tag
,
data
):
def
handle_doctype
(
self
,
tag
,
pubid
,
syslit
,
data
):
self
.
flush
()
self
.
flush
()
print
'DOCTYPE:'
,
tag
,
`data`
print
'DOCTYPE:'
,
tag
,
`data`
def
handle_entity
(
self
,
name
,
strval
,
pubid
,
syslit
,
ndata
):
self
.
flush
()
print
'ENTITY:'
,
`data`
def
handle_data
(
self
,
data
):
def
handle_data
(
self
,
data
):
self
.
testdata
=
self
.
testdata
+
data
self
.
testdata
=
self
.
testdata
+
data
if
len
(
`self.testdata`
)
>=
70
:
if
len
(
`self.testdata`
)
>=
70
:
...
@@ -589,10 +667,6 @@ class TestXMLParser(XMLParser):
...
@@ -589,10 +667,6 @@ class TestXMLParser(XMLParser):
self
.
flush
()
self
.
flush
()
print
'processing:'
,
name
,
`data`
print
'processing:'
,
name
,
`data`
def
handle_special
(
self
,
data
):
self
.
flush
()
print
'special:'
,
`data`
def
handle_comment
(
self
,
data
):
def
handle_comment
(
self
,
data
):
self
.
flush
()
self
.
flush
()
r
=
`data`
r
=
`data`
...
@@ -660,9 +734,13 @@ def test(args = None):
...
@@ -660,9 +734,13 @@ def test(args = None):
f
.
close
()
f
.
close
()
x
=
klass
()
x
=
klass
()
try
:
for
c
in
data
:
for
c
in
data
:
x
.
feed
(
c
)
x
.
feed
(
c
)
x
.
close
()
x
.
close
()
except
RuntimeError
,
msg
:
print
msg
sys
.
exit
(
1
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment