plistlib.py 14.2 KB
Newer Older
Benjamin Peterson's avatar
Benjamin Peterson committed
1
r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
Christian Heimes's avatar
Christian Heimes committed
2

3
The property list (.plist) file format is a simple XML pickle supporting
Christian Heimes's avatar
Christian Heimes committed
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
basic object types, like dictionaries, lists, numbers and strings.
Usually the top level object is a dictionary.

To write out a plist file, use the writePlist(rootObject, pathOrFile)
function. 'rootObject' is the top level object, 'pathOrFile' is a
filename or a (writable) file object.

To parse a plist from a file, use the readPlist(pathOrFile) function,
with a file name or a (readable) file object as the only argument. It
returns the top level object (again, usually a dictionary).

To work with plist data in bytes objects, you can use readPlistFromBytes()
and writePlistToBytes().

Values can be strings, integers, floats, booleans, tuples, lists,
19 20 21
dictionaries (but only with string keys), Data or datetime.datetime objects.
String values (including dictionary keys) have to be unicode strings -- they
will be written out as UTF-8.
Christian Heimes's avatar
Christian Heimes committed
22 23

The <data> plist type is supported through the Data class. This is a
24 25
thin wrapper around a Python bytes object. Use 'Data' if your strings
contain control characters.
Christian Heimes's avatar
Christian Heimes committed
26 27 28 29

Generate Plist example:

    pl = dict(
30 31
        aString = "Doodah",
        aList = ["A", "B", 12, 32.1, [1, 2, 3]],
Christian Heimes's avatar
Christian Heimes committed
32 33
        aFloat = 0.1,
        anInt = 728,
34 35 36 37 38
        aDict = dict(
            anotherString = "<hello & hi there!>",
            aUnicodeValue = "M\xe4ssig, Ma\xdf",
            aTrueValue = True,
            aFalseValue = False,
Christian Heimes's avatar
Christian Heimes committed
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
        ),
        someData = Data(b"<binary gunk>"),
        someMoreData = Data(b"<lots of binary gunk>" * 10),
        aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
    )
    writePlist(pl, fileName)

Parse Plist example:

    pl = readPlist(pathOrFile)
    print pl["aKey"]
"""


__all__ = [
    "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
    "Plist", "Data", "Dict"
]
# Note: the Plist and Dict classes have been deprecated.

import binascii
import datetime
from io import BytesIO
import re


def readPlist(pathOrFile):
    """Read a .plist file. 'pathOrFile' may either be a file name or a
    (readable) file object. Return the unpacked root object (which
    usually is a dictionary).
    """
    didOpen = False
71 72 73 74 75 76 77 78 79
    try:
        if isinstance(pathOrFile, str):
            pathOrFile = open(pathOrFile, 'rb')
            didOpen = True
        p = PlistParser()
        rootObject = p.parse(pathOrFile)
    finally:
        if didOpen:
            pathOrFile.close()
Christian Heimes's avatar
Christian Heimes committed
80 81 82 83 84 85 86 87
    return rootObject


def writePlist(rootObject, pathOrFile):
    """Write 'rootObject' to a .plist file. 'pathOrFile' may either be a
    file name or a (writable) file object.
    """
    didOpen = False
88 89 90 91 92 93 94 95 96 97 98
    try:
        if isinstance(pathOrFile, str):
            pathOrFile = open(pathOrFile, 'wb')
            didOpen = True
        writer = PlistWriter(pathOrFile)
        writer.writeln("<plist version=\"1.0\">")
        writer.writeValue(rootObject)
        writer.writeln("</plist>")
    finally:
        if didOpen:
            pathOrFile.close()
Christian Heimes's avatar
Christian Heimes committed
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152


def readPlistFromBytes(data):
    """Read a plist data from a bytes object. Return the root object.
    """
    return readPlist(BytesIO(data))


def writePlistToBytes(rootObject):
    """Return 'rootObject' as a plist-formatted bytes object.
    """
    f = BytesIO()
    writePlist(rootObject, f)
    return f.getvalue()


class DumbXMLWriter:
    def __init__(self, file, indentLevel=0, indent="\t"):
        self.file = file
        self.stack = []
        self.indentLevel = indentLevel
        self.indent = indent

    def beginElement(self, element):
        self.stack.append(element)
        self.writeln("<%s>" % element)
        self.indentLevel += 1

    def endElement(self, element):
        assert self.indentLevel > 0
        assert self.stack.pop() == element
        self.indentLevel -= 1
        self.writeln("</%s>" % element)

    def simpleElement(self, element, value=None):
        if value is not None:
            value = _escape(value)
            self.writeln("<%s>%s</%s>" % (element, value, element))
        else:
            self.writeln("<%s/>" % element)

    def writeln(self, line):
        if line:
            # plist has fixed encoding of utf-8
            if isinstance(line, str):
                line = line.encode('utf-8')
            self.file.write(self.indentLevel * self.indent)
            self.file.write(line)
        self.file.write(b'\n')


# Contents should conform to a subset of ISO 8601
# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'.  Smaller units may be omitted with
#  a loss of precision)
153
_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
Christian Heimes's avatar
Christian Heimes committed
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192

def _dateFromString(s):
    order = ('year', 'month', 'day', 'hour', 'minute', 'second')
    gd = _dateParser.match(s).groupdict()
    lst = []
    for key in order:
        val = gd[key]
        if val is None:
            break
        lst.append(int(val))
    return datetime.datetime(*lst)

def _dateToString(d):
    return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
        d.year, d.month, d.day,
        d.hour, d.minute, d.second
    )


# Regex to find any control chars, except for \t \n and \r
_controlCharPat = re.compile(
    r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
    r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")

def _escape(text):
    m = _controlCharPat.search(text)
    if m is not None:
        raise ValueError("strings can't contains control characters; "
                         "use plistlib.Data instead")
    text = text.replace("\r\n", "\n")       # convert DOS line endings
    text = text.replace("\r", "\n")         # convert Mac line endings
    text = text.replace("&", "&amp;")       # escape '&'
    text = text.replace("<", "&lt;")        # escape '<'
    text = text.replace(">", "&gt;")        # escape '>'
    return text


PLISTHEADER = b"""\
<?xml version="1.0" encoding="UTF-8"?>
193
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
Christian Heimes's avatar
Christian Heimes committed
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
"""

class PlistWriter(DumbXMLWriter):

    def __init__(self, file, indentLevel=0, indent=b"\t", writeHeader=1):
        if writeHeader:
            file.write(PLISTHEADER)
        DumbXMLWriter.__init__(self, file, indentLevel, indent)

    def writeValue(self, value):
        if isinstance(value, str):
            self.simpleElement("string", value)
        elif isinstance(value, bool):
            # must switch for bool before int, as bool is a
            # subclass of int...
            if value:
                self.simpleElement("true")
            else:
                self.simpleElement("false")
        elif isinstance(value, int):
            self.simpleElement("integer", "%d" % value)
        elif isinstance(value, float):
            self.simpleElement("real", repr(value))
        elif isinstance(value, dict):
            self.writeDict(value)
        elif isinstance(value, Data):
            self.writeData(value)
        elif isinstance(value, datetime.datetime):
            self.simpleElement("date", _dateToString(value))
        elif isinstance(value, (tuple, list)):
            self.writeArray(value)
        else:
226
            raise TypeError("unsupported type: %s" % type(value))
Christian Heimes's avatar
Christian Heimes committed
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268

    def writeData(self, data):
        self.beginElement("data")
        self.indentLevel -= 1
        maxlinelength = 76 - len(self.indent.replace(b"\t", b" " * 8) *
                                 self.indentLevel)
        for line in data.asBase64(maxlinelength).split(b"\n"):
            if line:
                self.writeln(line)
        self.indentLevel += 1
        self.endElement("data")

    def writeDict(self, d):
        self.beginElement("dict")
        items = sorted(d.items())
        for key, value in items:
            if not isinstance(key, str):
                raise TypeError("keys must be strings")
            self.simpleElement("key", key)
            self.writeValue(value)
        self.endElement("dict")

    def writeArray(self, array):
        self.beginElement("array")
        for value in array:
            self.writeValue(value)
        self.endElement("array")


class _InternalDict(dict):

    # This class is needed while Dict is scheduled for deprecation:
    # we only need to warn when a *user* instantiates Dict or when
    # the "attribute notation for dict keys" is used.

    def __getattr__(self, attr):
        try:
            value = self[attr]
        except KeyError:
            raise AttributeError(attr)
        from warnings import warn
        warn("Attribute access from plist dicts is deprecated, use d[key] "
269
             "notation instead", DeprecationWarning, 2)
Christian Heimes's avatar
Christian Heimes committed
270 271 272 273 274
        return value

    def __setattr__(self, attr, value):
        from warnings import warn
        warn("Attribute access from plist dicts is deprecated, use d[key] "
275
             "notation instead", DeprecationWarning, 2)
Christian Heimes's avatar
Christian Heimes committed
276 277 278 279 280 281 282 283 284
        self[attr] = value

    def __delattr__(self, attr):
        try:
            del self[attr]
        except KeyError:
            raise AttributeError(attr)
        from warnings import warn
        warn("Attribute access from plist dicts is deprecated, use d[key] "
285
             "notation instead", DeprecationWarning, 2)
Christian Heimes's avatar
Christian Heimes committed
286 287 288 289 290 291

class Dict(_InternalDict):

    def __init__(self, **kwargs):
        from warnings import warn
        warn("The plistlib.Dict class is deprecated, use builtin dict instead",
292
             DeprecationWarning, 2)
Christian Heimes's avatar
Christian Heimes committed
293 294 295 296 297 298 299 300 301 302 303 304
        super().__init__(**kwargs)


class Plist(_InternalDict):

    """This class has been deprecated. Use readPlist() and writePlist()
    functions instead, together with regular dict objects.
    """

    def __init__(self, **kwargs):
        from warnings import warn
        warn("The Plist class is deprecated, use the readPlist() and "
305
             "writePlist() functions instead", DeprecationWarning, 2)
Christian Heimes's avatar
Christian Heimes committed
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
        super().__init__(**kwargs)

    def fromFile(cls, pathOrFile):
        """Deprecated. Use the readPlist() function instead."""
        rootObject = readPlist(pathOrFile)
        plist = cls()
        plist.update(rootObject)
        return plist
    fromFile = classmethod(fromFile)

    def write(self, pathOrFile):
        """Deprecated. Use the writePlist() function instead."""
        writePlist(self, pathOrFile)


def _encodeBase64(s, maxlinelength=76):
322
    # copied from base64.encodebytes(), with added maxlinelength argument
Christian Heimes's avatar
Christian Heimes committed
323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
    maxbinsize = (maxlinelength//4)*3
    pieces = []
    for i in range(0, len(s), maxbinsize):
        chunk = s[i : i + maxbinsize]
        pieces.append(binascii.b2a_base64(chunk))
    return b''.join(pieces)

class Data:

    """Wrapper for binary data."""

    def __init__(self, data):
        if not isinstance(data, bytes):
            raise TypeError("data must be as bytes")
        self.data = data

    @classmethod
    def fromBase64(cls, data):
341
        # base64.decodebytes just calls binascii.a2b_base64;
Christian Heimes's avatar
Christian Heimes committed
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
        # it seems overkill to use both base64 and binascii.
        return cls(binascii.a2b_base64(data))

    def asBase64(self, maxlinelength=76):
        return _encodeBase64(self.data, maxlinelength)

    def __eq__(self, other):
        if isinstance(other, self.__class__):
            return self.data == other.data
        elif isinstance(other, str):
            return self.data == other
        else:
            return id(self) == id(other)

    def __repr__(self):
        return "%s(%s)" % (self.__class__.__name__, repr(self.data))

class PlistParser:

    def __init__(self):
        self.stack = []
        self.currentKey = None
        self.root = None

    def parse(self, fileobj):
        from xml.parsers.expat import ParserCreate
368 369 370 371 372
        self.parser = ParserCreate()
        self.parser.StartElementHandler = self.handleBeginElement
        self.parser.EndElementHandler = self.handleEndElement
        self.parser.CharacterDataHandler = self.handleData
        self.parser.ParseFile(fileobj)
Christian Heimes's avatar
Christian Heimes committed
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
        return self.root

    def handleBeginElement(self, element, attrs):
        self.data = []
        handler = getattr(self, "begin_" + element, None)
        if handler is not None:
            handler(attrs)

    def handleEndElement(self, element):
        handler = getattr(self, "end_" + element, None)
        if handler is not None:
            handler()

    def handleData(self, data):
        self.data.append(data)

    def addObject(self, value):
        if self.currentKey is not None:
391 392 393
            if not isinstance(self.stack[-1], type({})):
                raise ValueError("unexpected element at line %d" %
                                 self.parser.CurrentLineNumber)
Christian Heimes's avatar
Christian Heimes committed
394 395 396 397 398 399
            self.stack[-1][self.currentKey] = value
            self.currentKey = None
        elif not self.stack:
            # this is the root object
            self.root = value
        else:
400 401 402
            if not isinstance(self.stack[-1], type([])):
                raise ValueError("unexpected element at line %d" %
                                 self.parser.CurrentLineNumber)
Christian Heimes's avatar
Christian Heimes committed
403 404 405 406 407 408 409 410 411 412 413 414 415 416
            self.stack[-1].append(value)

    def getData(self):
        data = ''.join(self.data)
        self.data = []
        return data

    # element handlers

    def begin_dict(self, attrs):
        d = _InternalDict()
        self.addObject(d)
        self.stack.append(d)
    def end_dict(self):
417 418 419
        if self.currentKey:
            raise ValueError("missing value for key '%s' at line %d" %
                             (self.currentKey,self.parser.CurrentLineNumber))
Christian Heimes's avatar
Christian Heimes committed
420 421 422
        self.stack.pop()

    def end_key(self):
423 424 425
        if self.currentKey or not isinstance(self.stack[-1], type({})):
            raise ValueError("unexpected key at line %d" %
                             self.parser.CurrentLineNumber)
Christian Heimes's avatar
Christian Heimes committed
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
        self.currentKey = self.getData()

    def begin_array(self, attrs):
        a = []
        self.addObject(a)
        self.stack.append(a)
    def end_array(self):
        self.stack.pop()

    def end_true(self):
        self.addObject(True)
    def end_false(self):
        self.addObject(False)
    def end_integer(self):
        self.addObject(int(self.getData()))
    def end_real(self):
        self.addObject(float(self.getData()))
    def end_string(self):
        self.addObject(self.getData())
    def end_data(self):
        self.addObject(Data.fromBase64(self.getData().encode("utf-8")))
    def end_date(self):
        self.addObject(_dateFromString(self.getData()))