Kaydet (Commit) 1f54902e authored tarafından Fred Drake's avatar Fred Drake

Conform to the Python style guide.

üst ddb48674
import pulldom
import string
from StringIO import StringIO
import types
"""
"""\
minidom.py -- a lightweight DOM implementation based on SAX.
parse( "foo.xml" )
......@@ -19,6 +14,11 @@ Todo:
* SAX 2 namespaces
"""
import pulldom
import string
from StringIO import StringIO
import types
class Node:
ELEMENT_NODE = 1
ATTRIBUTE_NODE = 2
......@@ -33,229 +33,234 @@ class Node:
DOCUMENT_FRAGMENT_NODE = 11
NOTATION_NODE = 12
allnodes={}
_debug=0
_makeParentNodes=1
debug=None
allnodes = {}
_debug = 0
_makeParentNodes = 1
debug = None
def __init__( self ):
self.childNodes=[]
def __init__(self):
self.childNodes = []
if Node._debug:
index=repr( id( self ))+repr( self.__class__ )
Node.allnodes[index]=repr( self.__dict__ )
if Node.debug==None:
Node.debug=StringIO()
index = repr(id(self)) + repr(self.__class__)
Node.allnodes[index] = repr(self.__dict__)
if Node.debug is None:
Node.debug = StringIO()
#open( "debug4.out", "w" )
Node.debug.write( "create %s\n"%index )
Node.debug.write("create %s\n" % index)
def __getattr__( self, key ):
if key[0:2]=="__": raise AttributeError
def __getattr__(self, key):
if key[0:2] == "__":
raise AttributeError
# getattr should never call getattr!
if self.__dict__.has_key("inGetAttr"):
del self.inGetAttr
raise AttributeError, key
prefix,attrname=key[:5],key[5:]
if prefix=="_get_":
self.inGetAttr=1
if hasattr( self, attrname ):
prefix, attrname = key[:5], key[5:]
if prefix == "_get_":
self.inGetAttr = 1
if hasattr(self, attrname):
del self.inGetAttr
return (lambda self=self, attrname=attrname:
getattr( self, attrname ))
getattr(self, attrname))
else:
del self.inGetAttr
raise AttributeError, key
else:
self.inGetAttr=1
self.inGetAttr = 1
try:
func = getattr( self, "_get_"+key )
func = getattr(self, "_get_" + key)
except AttributeError:
raise AttributeError, key
del self.inGetAttr
return func()
def __nonzero__(self): return 1
def __nonzero__(self):
return 1
def toxml( self ):
writer=StringIO()
self.writexml( writer )
def toxml(self):
writer = StringIO()
self.writexml(writer)
return writer.getvalue()
def hasChildNodes( self ):
if self.childNodes: return 1
else: return 0
def hasChildNodes(self):
if self.childNodes:
return 1
else:
return 0
def _get_firstChild( self ):
def _get_firstChild(self):
return self.childNodes[0]
def _get_lastChild( self ):
def _get_lastChild(self):
return self.childNodes[-1]
def insertBefore( self, newChild, refChild):
index=self.childNodes.index( refChild )
self.childNodes.insert( index, newChild )
def insertBefore(self, newChild, refChild):
index = self.childNodes.index(refChild)
self.childNodes.insert(index, newChild)
if self._makeParentNodes:
newChild.parentNode=self
newChild.parentNode = self
def appendChild( self, node ):
self.childNodes.append( node )
def appendChild(self, node):
self.childNodes.append(node)
return node
def replaceChild( self, newChild, oldChild ):
index=self.childNodes.index( oldChild )
self.childNodes[index]=oldChild
def replaceChild(self, newChild, oldChild):
index = self.childNodes.index(oldChild)
self.childNodes[index] = oldChild
def removeChild( self, oldChild ):
index=self.childNodes.index( oldChild )
def removeChild(self, oldChild):
index = self.childNodes.index(oldChild)
del self.childNodes[index]
def cloneNode( self, deep ):
def cloneNode(self, deep):
import new
clone=new.instance( self.__class__, self.__dict__ )
clone.attributes=self.attributes.copy()
clone = new.instance(self.__class__, self.__dict__)
clone.attributes = self.attributes.copy()
if not deep:
clone.childNodes=[]
clone.childNodes = []
else:
clone.childNodes=map( lambda x: x.cloneNode, self.childNodes )
clone.childNodes = map(lambda x: x.cloneNode, self.childNodes)
return clone
def unlink( self ):
self.parentNode=None
def unlink(self):
self.parentNode = None
while self.childNodes:
self.childNodes[-1].unlink()
del self.childNodes[-1] # probably not most efficient!
self.childNodes=None
self.childNodes = None
if self.attributes:
for attr in self._attrs.values():
self.removeAttributeNode( attr )
assert not len( self._attrs )
assert not len( self._attrsNS )
self.removeAttributeNode(attr)
assert not len(self._attrs)
assert not len(self._attrsNS)
if Node._debug:
index=repr( id( self ))+repr( self.__class__ )
self.debug.write( "Deleting: %s\n" % index )
index = repr(id(self)) + repr(self.__class__)
self.debug.write("Deleting: %s\n" % index)
del Node.allnodes[index]
def _write_data( writer, data):
def _write_data(writer, data):
"Writes datachars to writer."
data=string.replace(data,"&","&")
data=string.replace(data,"<","&lt;")
data=string.replace(data,"\"","&quot;")
data=string.replace(data,">","&gt;")
data = string.replace(data, "&", "&amp;")
data = string.replace(data, "<", "&lt;")
data = string.replace(data, "\"", "&quot;")
data = string.replace(data, ">", "&gt;")
writer.write(data)
def _getElementsByTagNameHelper( parent, name, rc ):
def _getElementsByTagNameHelper(parent, name, rc):
for node in parent.childNodes:
if node.nodeType==Node.ELEMENT_NODE and\
(name=="*" or node.tagName==name):
rc.append( node )
_getElementsByTagNameHelper( node, name, rc )
if node.nodeType == Node.ELEMENT_NODE and \
(name == "*" or node.tagName == name):
rc.append(node)
_getElementsByTagNameHelper(node, name, rc)
return rc
def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ):
def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
for node in parent.childNodes:
if (node.nodeType==Node.ELEMENT_NODE ):
if ((localName=="*" or node.tagName==localName) and
(nsURI=="*" or node.namespaceURI==nsURI)):
rc.append( node )
_getElementsByTagNameNSHelper( node, name, rc )
if node.nodeType == Node.ELEMENT_NODE:
if ((localName == "*" or node.tagName == localName) and
(nsURI == "*" or node.namespaceURI == nsURI)):
rc.append(node)
_getElementsByTagNameNSHelper(node, name, rc)
class Attr(Node):
nodeType=Node.ATTRIBUTE_NODE
def __init__( self, qName, namespaceURI="", localName=None,
prefix=None ):
nodeType = Node.ATTRIBUTE_NODE
def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
# skip setattr for performance
self.__dict__["localName"]=localName or qName
self.__dict__["localName"] = localName or qName
self.__dict__["nodeName"] = self.__dict__["name"] = qName
self.__dict__["namespaceURI"]=namespaceURI
self.__dict__["prefix"]=prefix
self.attributes=None
Node.__init__( self )
self.__dict__["namespaceURI"] = namespaceURI
self.__dict__["prefix"] = prefix
self.attributes = None
Node.__init__(self)
# nodeValue and value are set elsewhere
def __setattr__( self, name, value ):
if name in ("value", "nodeValue" ):
self.__dict__["value"]=self.__dict__["nodeValue"]=value
def __setattr__(self, name, value):
if name in ("value", "nodeValue"):
self.__dict__["value"] = self.__dict__["nodeValue"] = value
else:
self.__dict__[name]=value
self.__dict__[name] = value
class AttributeList:
"""the attribute list is a transient interface to the underlying
dictionaries. mutations here will change the underlying element's
dictionary"""
def __init__( self, attrs, attrsNS ):
self._attrs=attrs
self._attrsNS=attrsNS
self.length=len( self._attrs.keys() )
def item( self, index ):
dictionaries. mutations here will change the underlying element's
dictionary"""
def __init__(self, attrs, attrsNS):
self._attrs = attrs
self._attrsNS = attrsNS
self.length = len(self._attrs.keys())
def item(self, index):
try:
return self[self.keys()[index]]
except IndexError:
return None
def items( self ):
return map( lambda node: (node.tagName, node.value),
self._attrs.values() )
def itemsNS( self ):
return map( lambda node: ((node.URI, node.localName), node.value),
self._attrs.values() )
def items(self):
return map(lambda node: (node.tagName, node.value),
self._attrs.values())
def itemsNS(self):
return map(lambda node: ((node.URI, node.localName), node.value),
self._attrs.values())
def keys( self ):
def keys(self):
return self._attrs.keys()
def keysNS( self ):
def keysNS(self):
return self._attrsNS.keys()
def values( self ):
def values(self):
return self._attrs.values()
def __len__( self ):
def __len__(self):
return self.length
def __cmp__( self, other ):
if self._attrs is getattr( other, "_attrs", None ):
def __cmp__(self, other):
if self._attrs is getattr(other, "_attrs", None):
return 0
else:
return cmp( id( self ), id( other ) )
return cmp(id(self), id(other))
#FIXME: is it appropriate to return .value?
def __getitem__( self, attname_or_tuple ):
if type( attname_or_tuple ) == types.TupleType:
def __getitem__(self, attname_or_tuple):
if type(attname_or_tuple) is types.TupleType:
return self._attrsNS[attname_or_tuple]
else:
return self._attrs[attname_or_tuple]
# same as set
def __setitem__( self, attname, value ):
if type( value ) == types.StringType:
node=Attr( attname )
def __setitem__(self, attname, value):
if type(value) is types.StringType:
node = Attr(attname)
node.value=value
else:
assert isinstance( value, Attr ) or type( value )==types.StringType
node=value
old=self._attrs.get( attname, None)
assert isinstance(value, Attr) or type(value) is types.StringType
node = value
old = self._attrs.get(attname, None)
if old:
old.unlink()
self._attrs[node.name]=node
self._attrsNS[(node.namespaceURI,node.localName)]=node
self._attrs[node.name] = node
self._attrsNS[(node.namespaceURI, node.localName)] = node
def __delitem__( self, attname_or_tuple ):
node=self[attname_or_tuple]
def __delitem__(self, attname_or_tuple):
node = self[attname_or_tuple]
node.unlink()
del self._attrs[node.name]
del self._attrsNS[(node.namespaceURI, node.localName)]
class Element( Node ):
nodeType=Node.ELEMENT_NODE
def __init__( self, tagName, namespaceURI="", prefix="",
localName=None ):
Node.__init__( self )
nodeType = Node.ELEMENT_NODE
def __init__(self, tagName, namespaceURI="", prefix="",
localName=None):
Node.__init__(self)
self.tagName = self.nodeName = tagName
self.localName=localName or tagName
self.prefix=prefix
self.namespaceURI=namespaceURI
self.nodeValue=None
self.localName = localName or tagName
self.prefix = prefix
self.namespaceURI = namespaceURI
self.nodeValue = None
self._attrs={} # attributes are double-indexed:
self._attrsNS={}# tagName -> Attribute
......@@ -264,191 +269,195 @@ class Element( Node ):
# this is too tricky for now because of headaches
# with namespaces.
def getAttribute( self, attname ):
def getAttribute(self, attname):
return self._attrs[attname].value
def getAttributeNS( self, namespaceURI, localName ):
def getAttributeNS(self, namespaceURI, localName):
return self._attrsNS[(namespaceURI, localName)].value
def setAttribute( self, attname, value ):
attr=Attr( attname )
def setAttribute(self, attname, value):
attr = Attr(attname)
# for performance
attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
self.setAttributeNode( attr )
attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
self.setAttributeNode(attr)
def setAttributeNS( self, namespaceURI, qualifiedName, value ):
prefix,localname=_nssplit( qualifiedName )
def setAttributeNS(self, namespaceURI, qualifiedName, value):
prefix, localname = _nssplit(qualifiedName)
# for performance
attr = Attr( qualifiedName, namespaceURI, localname, prefix )
attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
self.setAttributeNode( attr )
attr = Attr(qualifiedName, namespaceURI, localname, prefix)
attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
self.setAttributeNode(attr)
def getAttributeNode( self, attrname ):
return self._attrs.get( attrname )
def getAttributeNode(self, attrname):
return self._attrs.get(attrname)
def getAttributeNodeNS( self, namespaceURI, localName ):
def getAttributeNodeNS(self, namespaceURI, localName):
return self._attrsNS[(namespaceURI, localName)]
def setAttributeNode( self, attr ):
old=self._attrs.get( attr.name, None)
def setAttributeNode(self, attr):
old = self._attrs.get(attr.name, None)
if old:
old.unlink()
self._attrs[attr.name]=attr
self._attrsNS[(attr.namespaceURI,attr.localName)]=attr
self._attrs[attr.name] = attr
self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
def removeAttribute( self, name ):
def removeAttribute(self, name):
attr = self._attrs[name]
self.removeAttributeNode( attr )
self.removeAttributeNode(attr)
def removeAttributeNS( self, namespaceURI, localName ):
def removeAttributeNS(self, namespaceURI, localName):
attr = self._attrsNS[(namespaceURI, localName)]
self.removeAttributeNode( attr )
self.removeAttributeNode(attr)
def removeAttributeNode( self, node ):
def removeAttributeNode(self, node):
node.unlink()
del self._attrs[node.name]
del self._attrsNS[(node.namespaceURI, node.localName)]
def getElementsByTagName( self, name ):
return _getElementsByTagNameHelper( self, name, [] )
def getElementsByTagName(self, name):
return _getElementsByTagNameHelper(self, name, [])
def getElementsByTagNameNS(self,namespaceURI,localName):
_getElementsByTagNameNSHelper( self, namespaceURI, localName, [] )
def getElementsByTagNameNS(self, namespaceURI, localName):
_getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
def __repr__( self ):
return "<DOM Element:"+self.tagName+" at "+`id( self )` +" >"
def __repr__(self):
return "<DOM Element: %s at %s>" % (self.tagName, id(self))
def writexml(self, writer):
writer.write("<"+self.tagName)
writer.write("<" + self.tagName)
a_names=self._get_attributes().keys()
a_names = self._get_attributes().keys()
a_names.sort()
for a_name in a_names:
writer.write(" "+a_name+"=\"")
writer.write(" %s=\"" % a_name)
_write_data(writer, self._get_attributes()[a_name])
writer.write("\"")
if self.childNodes:
writer.write(">")
for node in self.childNodes:
node.writexml( writer )
writer.write("</"+self.tagName+">")
node.writexml(writer)
writer.write("</%s>" % self.tagName)
else:
writer.write("/>")
def _get_attributes( self ):
return AttributeList( self._attrs, self._attrsNS )
def _get_attributes(self):
return AttributeList(self._attrs, self._attrsNS)
class Comment(Node):
nodeType = Node.COMMENT_NODE
def __init__(self, data):
Node.__init__(self)
self.data = self.nodeValue = data
self.nodeName = "#comment"
self.attributes = None
class Comment( Node ):
nodeType=Node.COMMENT_NODE
def __init__(self, data ):
Node.__init__( self )
self.data=self.nodeValue=data
self.nodeName="#comment"
self.attributes=None
def writexml(self, writer):
writer.write("<!--%s-->" % self.data)
def writexml( self, writer ):
writer.write( "<!--" + self.data + "-->" )
class ProcessingInstruction(Node):
nodeType = Node.PROCESSING_INSTRUCTION_NODE
class ProcessingInstruction( Node ):
nodeType=Node.PROCESSING_INSTRUCTION_NODE
def __init__(self, target, data ):
Node.__init__( self )
def __init__(self, target, data):
Node.__init__(self)
self.target = self.nodeName = target
self.data = self.nodeValue = data
self.attributes=None
self.attributes = None
def writexml(self, writer):
writer.write("<?%s %s?>" % (self.target, self.data))
def writexml( self, writer ):
writer.write( "<?" + self.target +" " + self.data+ "?>" )
class Text(Node):
nodeType = Node.TEXT_NODE
nodeName = "#text"
class Text( Node ):
nodeType=Node.TEXT_NODE
nodeName="#text"
def __init__(self, data ):
Node.__init__( self )
def __init__(self, data):
Node.__init__(self)
self.data = self.nodeValue = data
self.attributes=None
self.attributes = None
def __repr__(self):
if len( self.data )> 10:
dotdotdot="..."
if len(self.data) > 10:
dotdotdot = "..."
else:
dotdotdot=""
return "<DOM Text node \"" + self.data[0:10] + dotdotdot+"\">"
dotdotdot = ""
return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot)
def writexml( self, writer ):
_write_data( writer, self.data )
def writexml(self, writer):
_write_data(writer, self.data)
def _nssplit( qualifiedName ):
fields = string.split(qualifiedName, ':')
def _nssplit(qualifiedName):
fields = qualifiedName.split(':', 1)
if len(fields) == 2:
return fields
elif len(fields) == 1:
return( '', fields[0] )
class Document( Node ):
nodeType=Node.DOCUMENT_NODE
documentElement=None
def __init__( self ):
Node.__init__( self )
self.attributes=None
self.nodeName="#document"
self.nodeValue=None
def appendChild( self, node ):
if node.nodeType==Node.ELEMENT_NODE:
return ('', fields[0])
class Document(Node):
nodeType = Node.DOCUMENT_NODE
documentElement = None
def __init__(self):
Node.__init__(self)
self.attributes = None
self.nodeName = "#document"
self.nodeValue = None
def appendChild(self, node):
if node.nodeType == Node.ELEMENT_NODE:
if self.documentElement:
raise TypeError, "Two document elements disallowed"
else:
self.documentElement=node
Node.appendChild( self, node )
self.documentElement = node
Node.appendChild(self, node)
return node
createElement=Element
createElement = Element
createTextNode=Text
createTextNode = Text
createComment=Comment
createComment = Comment
createProcessingInstruction=ProcessingInstruction
createProcessingInstruction = ProcessingInstruction
createAttribute=Attr
createAttribute = Attr
def createElementNS(self, namespaceURI, qualifiedName):
prefix,localName=_nssplit( qualifiedName )
prefix,localName = _nssplit(qualifiedName)
return Element(qualifiedName, namespaceURI, prefix, localName)
def createAttributeNS(self, namespaceURI, qualifiedName):
prefix,localName=_nssplit( qualifiedName )
prefix,localName = _nssplit(qualifiedName)
return Attr(namespaceURI, qualifiedName, localName, prefix)
def getElementsByTagNameNS(self,namespaceURI,localName):
_getElementsByTagNameNSHelper( self, namespaceURI, localName )
def getElementsByTagNameNS(self, namespaceURI, localName):
_getElementsByTagNameNSHelper(self, namespaceURI, localName)
def unlink( self ):
self.documentElement=None
Node.unlink( self )
def unlink(self):
self.documentElement = None
Node.unlink(self)
def getElementsByTagName( self, name ):
rc=[]
_getElementsByTagNameHelper( self, name, rc )
def getElementsByTagName(self, name):
rc = []
_getElementsByTagNameHelper(self, name, rc)
return rc
def writexml( self, writer ):
def writexml(self, writer):
for node in self.childNodes:
node.writexml( writer )
node.writexml(writer)
def _doparse( func, args, kwargs ):
events=apply( func, args, kwargs )
(toktype, rootNode)=events.getEvent()
events.expandNode( rootNode )
def _doparse(func, args, kwargs):
events = apply(func, args, kwargs)
toktype, rootNode = events.getEvent()
events.expandNode(rootNode)
return rootNode
def parse( *args, **kwargs ):
def parse(*args, **kwargs):
"Parse a file into a DOM by filename or file object"
return _doparse( pulldom.parse, args, kwargs )
return _doparse(pulldom.parse, args, kwargs)
def parseString( *args, **kwargs ):
def parseString(*args, **kwargs):
"Parse a file into a DOM from a string"
return _doparse( pulldom.parseString, args, kwargs )
return _doparse(pulldom.parseString, args, kwargs)
import minidom
import types
import string
import sys
import xml.sax
#todo: SAX2/namespace handling
START_ELEMENT="START_ELEMENT"
END_ELEMENT="END_ELEMENT"
COMMENT="COMMENT"
START_DOCUMENT="START_DOCUMENT"
END_DOCUMENT="END_DOCUMENT"
PROCESSING_INSTRUCTION="PROCESSING_INSTRUCTION"
IGNORABLE_WHITESPACE="IGNORABLE_WHITESPACE"
CHARACTERS="CHARACTERS"
START_ELEMENT = "START_ELEMENT"
END_ELEMENT = "END_ELEMENT"
COMMENT = "COMMENT"
START_DOCUMENT = "START_DOCUMENT"
END_DOCUMENT = "END_DOCUMENT"
PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
CHARACTERS = "CHARACTERS"
class PullDOM:
def __init__( self ):
self.firstEvent=[None,None]
self.lastEvent=self.firstEvent
def __init__(self):
self.firstEvent = [None, None]
self.lastEvent = self.firstEvent
def setDocumentLocator( self, locator ): pass
def setDocumentLocator(self, locator): pass
def startElement( self, name, tagName , attrs ):
if not hasattr( self, "curNode" ):
def startElement(self, name, tagName, attrs):
if not hasattr(self, "curNode"):
# FIXME: hack!
self.startDocument( )
self.startDocument()
node = self.document.createElement( tagName ) #FIXME namespaces!
node = self.document.createElement(tagName) #FIXME namespaces!
for attr in attrs.keys():
node.setAttribute( attr, attrs[attr] )
parent=self.curNode
node.setAttribute(attr, attrs[attr])
parent = self.curNode
node.parentNode = parent
if parent.childNodes:
node.previousSibling=parent.childNodes[-1]
node.previousSibling.nextSibling=node
node.previousSibling = parent.childNodes[-1]
node.previousSibling.nextSibling = node
self.curNode = node
# FIXME: do I have to screen namespace attributes
self.lastEvent[1]=[(START_ELEMENT, node), None ]
self.lastEvent=self.lastEvent[1]
#self.events.append( (START_ELEMENT, node) )
self.lastEvent[1] = [(START_ELEMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((START_ELEMENT, node))
def endElement( self, name, tagName ):
def endElement(self, name, tagName):
node = self.curNode
self.lastEvent[1]=[(END_ELEMENT, node), None ]
self.lastEvent=self.lastEvent[1]
#self.events.append( (END_ELEMENT, node ))
self.lastEvent[1] = [(END_ELEMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((END_ELEMENT, node))
self.curNode = node.parentNode
def comment( self, s):
node = self.document.createComment ( s )
parent=self.curNode
node.parentNode=parent
def comment(self, s):
node = self.document.createComment(s)
parent = self.curNode
node.parentNode = parent
if parent.childNodes:
node.previousSibling=parent.childNodes[-1]
node.previousSibling.nextSibling=node
self.lastEvent[1]=[(COMMENT, node), None ]
self.lastEvent=self.lastEvent[1]
#self.events.append( (COMMENT, node ))
def processingInstruction( self, target, data ):
node = self.document.createProcessingInstruction( target, data )
#self.appendChild( node )
node.previousSibling = parent.childNodes[-1]
node.previousSibling.nextSibling = node
self.lastEvent[1] = [(COMMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((COMMENT, node))
def processingInstruction(self, target, data):
node = self.document.createProcessingInstruction(target, data)
#self.appendChild(node)
parent=self.curNode
node.parentNode=parent
parent = self.curNode
node.parentNode = parent
if parent.childNodes:
node.previousSibling=parent.childNodes[-1]
node.previousSibling.nextSibling=node
self.lastEvent[1]=[(PROCESSING_INSTRUCTION, node), None ]
self.lastEvent=self.lastEvent[1]
#self.events.append( (PROCESSING_INSTRUCTION, node) )
def ignorableWhitespace( self, chars ):
node = self.document.createTextNode( chars[start:start+length] )
parent=self.curNode
node.parentNode=parent
node.previousSibling = parent.childNodes[-1]
node.previousSibling.nextSibling = node
self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((PROCESSING_INSTRUCTION, node))
def ignorableWhitespace(self, chars):
node = self.document.createTextNode(chars[start:start + length])
parent = self.curNode
node.parentNode = parent
if parent.childNodes:
node.previousSibling=parent.childNodes[-1]
node.previousSibling.nextSibling=node
self.lastEvent[1]=[(IGNORABLE_WHITESPACE, node), None ]
self.lastEvent=self.lastEvent[1]
#self.events.append( (IGNORABLE_WHITESPACE, node))
def characters( self, chars ):
node = self.document.createTextNode( chars )
node.parentNode=self.curNode
self.lastEvent[1]=[(CHARACTERS, node), None ]
self.lastEvent=self.lastEvent[1]
def startDocument( self ):
node.previousSibling = parent.childNodes[-1]
node.previousSibling.nextSibling = node
self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((IGNORABLE_WHITESPACE, node))
def characters(self, chars):
node = self.document.createTextNode(chars)
node.parentNode = self.curNode
self.lastEvent[1] = [(CHARACTERS, node), None]
self.lastEvent = self.lastEvent[1]
def startDocument(self):
node = self.curNode = self.document = minidom.Document()
node.parentNode=None
self.lastEvent[1]=[(START_DOCUMENT, node), None ]
self.lastEvent=self.lastEvent[1]
#self.events.append( (START_DOCUMENT, node) )
def endDocument( self ):
assert( not self.curNode.parentNode )
node.parentNode = None
self.lastEvent[1] = [(START_DOCUMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((START_DOCUMENT, node))
def endDocument(self):
assert not self.curNode.parentNode
for node in self.curNode.childNodes:
if node.nodeType==node.ELEMENT_NODE:
self.document.documentElement = node
if node.nodeType == node.ELEMENT_NODE:
self.document.documentElement = node
#if not self.document.documentElement:
# raise Error, "No document element"
# raise Error, "No document element"
self.lastEvent[1]=[(END_DOCUMENT, node), None ]
#self.events.append( (END_DOCUMENT, self.curNode) )
self.lastEvent[1] = [(END_DOCUMENT, node), None]
#self.events.append((END_DOCUMENT, self.curNode))
class ErrorHandler:
def warning( self, exception ):
def warning(self, exception):
print exception
def error( self, exception ):
def error(self, exception):
raise exception
def fatalError( self, exception ):
def fatalError(self, exception):
raise exception
class DOMEventStream:
def __init__( self, stream, parser, bufsize ):
self.stream=stream
self.parser=parser
self.bufsize=bufsize
def __init__(self, stream, parser, bufsize):
self.stream = stream
self.parser = parser
self.bufsize = bufsize
self.reset()
def reset( self ):
def reset(self):
self.pulldom = PullDOM()
self.parser.setContentHandler( self.pulldom )
self.parser.setContentHandler(self.pulldom)
def __getitem__( self, pos ):
rc=self.getEvent()
if rc: return rc
def __getitem__(self, pos):
rc = self.getEvent()
if rc:
return rc
raise IndexError
def expandNode( self, node ):
event=self.getEvent()
def expandNode(self, node):
event = self.getEvent()
while event:
token,cur_node=event
if cur_node is node: return
if token !=END_ELEMENT:
cur_node.parentNode.appendChild( cur_node )
event=self.getEvent()
def getEvent( self ):
token, cur_node = event
if cur_node is node:
return
if token != END_ELEMENT:
cur_node.parentNode.appendChild(cur_node)
event = self.getEvent()
def getEvent(self):
if not self.pulldom.firstEvent[1]:
self.pulldom.lastEvent=self.pulldom.firstEvent
self.pulldom.lastEvent = self.pulldom.firstEvent
while not self.pulldom.firstEvent[1]:
buf=self.stream.read( self.bufsize )
buf=self.stream.read(self.bufsize)
if not buf:
#FIXME: why doesn't Expat close work?
#self.parser.close()
return None
self.parser.feed( buf )
rc=self.pulldom.firstEvent[1][0]
self.pulldom.firstEvent[1]=self.pulldom.firstEvent[1][1]
self.parser.feed(buf)
rc = self.pulldom.firstEvent[1][0]
self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
return rc
# FIXME: sax2
......@@ -168,27 +166,25 @@ class DOMEventStream:
def _getParser():
return xml.sax.make_parser()
default_bufsize=(2**14)-20
default_bufsize = (2 ** 14) - 20
# FIXME: move into sax package for common usage
def parse( stream_or_string, parser=None, bufsize=default_bufsize ):
if type( stream_or_string ) == type( "" ):
stream=open( stream_or_string )
def parse(stream_or_string, parser=None, bufsize=default_bufsize):
if type(stream_or_string) is type(""):
stream = open(stream_or_string)
else:
stream=stream_or_string
stream = stream_or_string
if not parser:
parser=_getParser()
return DOMEventStream( stream, parser, bufsize )
parser = _getParser()
return DOMEventStream(stream, parser, bufsize)
def parseString( string, parser=None ):
def parseString(string, parser=None):
try:
import cStringIO
stringio=cStringIO.StringIO
from cStringIO import StringIO
except ImportError:
import StringIO
stringio=StringIO.StringIO
from StringIO import StringIO
bufsize=len( string )
buf=stringio( string )
parser=_getParser()
return DOMEventStream( buf, parser, bufsize )
bufsize = len(string)
buf = StringIO(string)
parser = _getParser()
return DOMEventStream(buf, parser, bufsize)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment