expatreader.py 7.35 KB
Newer Older
1 2
"""
SAX driver for the Pyexpat C module.  This driver works with
3
pyexpat.__version__ == '2.22'.
4 5 6 7
"""

version = "0.20"

8
from xml.sax._exceptions import *
9 10 11
try:
    from xml.parsers import expat
except ImportError:
12
    raise SAXReaderNotAvailable("expat not supported",None)
13
from xml.sax import xmlreader, saxutils, handler
14

15 16 17
AttributesImpl = xmlreader.AttributesImpl
AttributesNSImpl = xmlreader.AttributesNSImpl

18 19
import string

20 21
# --- ExpatParser

22
class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
23 24 25 26
    "SAX driver for the Pyexpat C module."

    def __init__(self, namespaceHandling=0, bufsize=2**16-20):
        xmlreader.IncrementalParser.__init__(self, bufsize)
27
        self._source = xmlreader.InputSource()
28 29 30
        self._parser = None
        self._namespaces = namespaceHandling
        self._parsing = 0
31
        self._entity_stack = []
32 33 34

    # XMLReader methods

35
    def parse(self, source):
36
        "Parse an XML document from a URL or an InputSource."
37 38 39
        source = saxutils.prepare_input_source(source)

        self._source = source
40 41
        self.reset()
        self._cont_handler.setDocumentLocator(self)
42
        xmlreader.IncrementalParser.parse(self, source)
43

44 45 46
    def prepareParser(self, source):
        if source.getSystemId() != None:
            self._parser.SetBase(source.getSystemId())
47

48
    def getFeature(self, name):
49
        if name == handler.feature_namespaces:
50
            return self._namespaces
51 52 53
        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)

    def setFeature(self, name, state):
54 55
        if self._parsing:
            raise SAXNotSupportedException("Cannot set features while parsing")
56
        if name == handler.feature_namespaces:
57 58 59 60
            self._namespaces = state
        else:
            raise SAXNotRecognizedException("Feature '%s' not recognized" %
                                            name)
61 62 63 64 65 66 67 68 69

    def getProperty(self, name):
        raise SAXNotRecognizedException("Property '%s' not recognized" % name)

    def setProperty(self, name, value):
        raise SAXNotRecognizedException("Property '%s' not recognized" % name)

    # IncrementalParser methods

70
    def feed(self, data, isFinal = 0):
71 72
        if not self._parsing:
            self.reset()
73
            self._parsing = 1
74
            self._cont_handler.startDocument()
75

76 77 78 79 80 81 82 83
        try:
            # The isFinal parameter is internal to the expat reader.
            # If it is set to true, expat will check validity of the entire
            # document. When feeding chunks, they are not normally final -
            # except when invoked from close.
            self._parser.Parse(data, isFinal)
        except expat.error:
            error_code = self._parser.ErrorCode
84 85
            exc = SAXParseException(expat.ErrorString(error_code), None, self)
            self._err_handler.fatalError(exc)
86 87

    def close(self):
88 89 90 91 92 93
        if self._entity_stack:
            # If we are completing an external entity, do nothing here
            return
        self.feed("", isFinal = 1)
        self._cont_handler.endDocument()
        self._parsing = 0
94

95 96
    def reset(self):
        if self._namespaces:
97
            self._parser = expat.ParserCreate(None, " ")
98 99 100
            self._parser.StartElementHandler = self.start_element_ns
            self._parser.EndElementHandler = self.end_element_ns
        else:
101
            self._parser = expat.ParserCreate()
102 103
            self._parser.StartElementHandler = self.start_element
            self._parser.EndElementHandler = self.end_element
104 105 106 107 108 109 110 111

        self._parser.ProcessingInstructionHandler = \
                                    self._cont_handler.processingInstruction
        self._parser.CharacterDataHandler = self._cont_handler.characters
        self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
        self._parser.NotationDeclHandler = self.notation_decl
        self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
        self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
112 113 114 115 116 117
#         self._parser.CommentHandler =
#         self._parser.StartCdataSectionHandler =
#         self._parser.EndCdataSectionHandler =
#         self._parser.DefaultHandler =
#         self._parser.DefaultHandlerExpand =
#         self._parser.NotStandaloneHandler =
118
        self._parser.ExternalEntityRefHandler = self.external_entity_ref
119

120
        self._parsing = 0
121
        self._entity_stack = []
122

123 124 125 126 127 128 129 130 131 132 133 134
    # Locator methods

    def getColumnNumber(self):
        return self._parser.ErrorColumnNumber

    def getLineNumber(self):
        return self._parser.ErrorLineNumber

    def getPublicId(self):
        return self._source.getPublicId()

    def getSystemId(self):
135
        return self._source.getSystemId()
136

137 138
    # event handlers
    def start_element(self, name, attrs):
139
        self._cont_handler.startElement(name, AttributesImpl(attrs))
140 141

    def end_element(self, name):
142
        self._cont_handler.endElement(name)
143 144

    def start_element_ns(self, name, attrs):
145
        pair = string.split(name)
146
        if len(pair) == 1:
147
            pair = (None, name)
148 149
        else:
            pair = tuple(pair)
150

151 152
        newattrs = {}
        for (aname, value) in attrs.items():
153
            apair = string.split(aname)
154 155 156 157 158 159 160
            if len(apair) == 1:
                apair = (None, aname)
            else:
                apair = tuple(apair)

            newattrs[apair] = value

161
        self._cont_handler.startElementNS(pair, None,
162
                                          AttributesNSImpl(newattrs, {}))
163 164

    def end_element_ns(self, name):
165
        pair = string.split(name)
166
        if len(pair) == 1:
167
            pair = (None, name)
168

169
        self._cont_handler.endElementNS(pair, None)
170

171
    # this is not used (call directly to ContentHandler)
172 173 174
    def processing_instruction(self, target, data):
        self._cont_handler.processingInstruction(target, data)

175
    # this is not used (call directly to ContentHandler)
176 177 178 179 180 181 182 183
    def character_data(self, data):
        self._cont_handler.characters(data)

    def start_namespace_decl(self, prefix, uri):
        self._cont_handler.startPrefixMapping(prefix, uri)

    def end_namespace_decl(self, prefix):
        self._cont_handler.endPrefixMapping(prefix)
184

185 186 187 188 189 190 191 192
    def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
        self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)

    def notation_decl(self, name, base, sysid, pubid):
        self._dtd_handler.notationDecl(name, pubid, sysid)

    def external_entity_ref(self, context, base, sysid, pubid):
        source = self._ent_handler.resolveEntity(pubid, sysid)
193 194 195
        source = saxutils.prepare_input_source(source,
                                               self._source.getSystemId() or
                                               "")
196

197 198 199 200 201 202 203 204 205 206 207
        self._entity_stack.append((self._parser, self._source))
        self._parser = self._parser.ExternalEntityParserCreate(context)
        self._source = source

        try:
            xmlreader.IncrementalParser.parse(self, source)
        except:
            return 0  # FIXME: save error info here?

        (self._parser, self._source) = self._entity_stack[-1]
        del self._entity_stack[-1]
208
        return 1
209

210
# ---
211

212
def create_parser(*args, **kwargs):
213
    return apply(ExpatParser, args, kwargs)
214

215 216 217 218 219 220 221 222
# ---

if __name__ == "__main__":
    import xml.sax
    p = create_parser()
    p.setContentHandler(xml.sax.XMLGenerator())
    p.setErrorHandler(xml.sax.ErrorHandler())
    p.parse("../../../hamlet.xml")