mailbox.py 74.1 KB
Newer Older
1
#! /usr/bin/env python3
2

3
"""Read/write support for Maildir, mbox, MH, Babyl, and MMDF mailboxes."""
4

5 6 7 8 9 10
# Notes for authors of new mailbox subclasses:
#
# Remember to fsync() changes to disk before closing a modified file
# or returning from a flush() method.  See functions _sync_flush() and
# _sync_close().

11
import sys
Jack Jansen's avatar
Jack Jansen committed
12
import os
13 14 15 16 17
import time
import calendar
import socket
import errno
import copy
18
import warnings
19
import email
20 21
import email.message
import email.generator
22
import io
23
import contextlib
24
try:
25 26 27 28
    if sys.platform == 'os2emx':
        # OS/2 EMX fcntl() not adequate
        raise ImportError
    import fcntl
29 30
except ImportError:
    fcntl = None
31

32 33
__all__ = [ 'Mailbox', 'Maildir', 'mbox', 'MH', 'Babyl', 'MMDF',
            'Message', 'MaildirMessage', 'mboxMessage', 'MHMessage',
Benjamin Peterson's avatar
Benjamin Peterson committed
34
            'BabylMessage', 'MMDFMessage']
35

36 37
linesep = os.linesep.encode('ascii')

38 39
class Mailbox:
    """A group of messages in a particular place."""
40

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
    def __init__(self, path, factory=None, create=True):
        """Initialize a Mailbox instance."""
        self._path = os.path.abspath(os.path.expanduser(path))
        self._factory = factory

    def add(self, message):
        """Add message and return assigned key."""
        raise NotImplementedError('Method must be implemented by subclass')

    def remove(self, key):
        """Remove the keyed message; raise KeyError if it doesn't exist."""
        raise NotImplementedError('Method must be implemented by subclass')

    def __delitem__(self, key):
        self.remove(key)

    def discard(self, key):
        """If the keyed message exists, remove it."""
        try:
            self.remove(key)
        except KeyError:
            pass

    def __setitem__(self, key, message):
        """Replace the keyed message; raise KeyError if it doesn't exist."""
        raise NotImplementedError('Method must be implemented by subclass')

    def get(self, key, default=None):
        """Return the keyed message, or default if it doesn't exist."""
        try:
            return self.__getitem__(key)
        except KeyError:
            return default

    def __getitem__(self, key):
        """Return the keyed message; raise KeyError if it doesn't exist."""
        if not self._factory:
            return self.get_message(key)
        else:
80 81
            with contextlib.closing(self.get_file(key)) as file:
                return self._factory(file)
82 83 84 85 86 87

    def get_message(self, key):
        """Return a Message representation or raise a KeyError."""
        raise NotImplementedError('Method must be implemented by subclass')

    def get_string(self, key):
88 89 90 91 92 93 94 95
        """Return a string representation or raise a KeyError.

        Uses email.message.Message to create a 7bit clean string
        representation of the message."""
        return email.message_from_bytes(self.get_bytes(key)).as_string()

    def get_bytes(self, key):
        """Return a byte string representation or raise a KeyError."""
96 97 98 99 100 101 102 103 104 105 106 107
        raise NotImplementedError('Method must be implemented by subclass')

    def get_file(self, key):
        """Return a file-like representation or raise a KeyError."""
        raise NotImplementedError('Method must be implemented by subclass')

    def iterkeys(self):
        """Return an iterator over keys."""
        raise NotImplementedError('Method must be implemented by subclass')

    def keys(self):
        """Return a list of keys."""
108
        return list(self.iterkeys())
109 110 111

    def itervalues(self):
        """Return an iterator over all messages."""
112
        for key in self.keys():
113 114 115 116 117
            try:
                value = self[key]
            except KeyError:
                continue
            yield value
Fred Drake's avatar
Fred Drake committed
118

119
    def __iter__(self):
120
        return self.itervalues()
121 122 123

    def values(self):
        """Return a list of messages. Memory intensive."""
124
        return list(self.itervalues())
125 126 127

    def iteritems(self):
        """Return an iterator over (key, message) tuples."""
128
        for key in self.keys():
129 130 131 132 133 134 135 136
            try:
                value = self[key]
            except KeyError:
                continue
            yield (key, value)

    def items(self):
        """Return a list of (key, message) tuples. Memory intensive."""
137
        return list(self.iteritems())
138

139
    def __contains__(self, key):
140 141 142 143 144 145 146 147 148
        """Return True if the keyed message exists, False otherwise."""
        raise NotImplementedError('Method must be implemented by subclass')

    def __len__(self):
        """Return a count of messages in the mailbox."""
        raise NotImplementedError('Method must be implemented by subclass')

    def clear(self):
        """Delete all messages."""
149
        for key in self.keys():
150 151 152 153 154 155 156 157 158 159 160 161 162
            self.discard(key)

    def pop(self, key, default=None):
        """Delete the keyed message and return it, or default."""
        try:
            result = self[key]
        except KeyError:
            return default
        self.discard(key)
        return result

    def popitem(self):
        """Delete an arbitrary (key, message) pair and return it."""
163
        for key in self.keys():
164 165 166 167 168 169 170
            return (key, self.pop(key))     # This is only run once.
        else:
            raise KeyError('No messages in mailbox')

    def update(self, arg=None):
        """Change the messages that correspond to certain keys."""
        if hasattr(arg, 'iteritems'):
171
            source = arg.items()
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
        elif hasattr(arg, 'items'):
            source = arg.items()
        else:
            source = arg
        bad_key = False
        for key, message in source:
            try:
                self[key] = message
            except KeyError:
                bad_key = True
        if bad_key:
            raise KeyError('No message with key(s)')

    def flush(self):
        """Write any pending changes to the disk."""
        raise NotImplementedError('Method must be implemented by subclass')

    def lock(self):
        """Lock the mailbox."""
        raise NotImplementedError('Method must be implemented by subclass')

    def unlock(self):
        """Unlock the mailbox if it is locked."""
        raise NotImplementedError('Method must be implemented by subclass')

    def close(self):
        """Flush and close the mailbox."""
        raise NotImplementedError('Method must be implemented by subclass')
200

201 202 203 204 205 206 207 208 209 210
    def _string_to_bytes(self, message):
        # If a message is not 7bit clean, we refuse to handle it since it
        # likely came from reading invalid messages in text mode, and that way
        # lies mojibake.
        try:
            return message.encode('ascii')
        except UnicodeError:
            raise ValueError("String input must be ASCII-only; "
                "use bytes or a Message instead")

211
    def _dump_message(self, message, target, mangle_from_=False):
212
        # This assumes the target file is open in binary mode.
213
        """Dump message contents to target file."""
214
        if isinstance(message, email.message.Message):
215 216
            buffer = io.BytesIO()
            gen = email.generator.BytesGenerator(buffer, mangle_from_, 0)
217 218
            gen.flatten(message)
            buffer.seek(0)
219
            data = buffer.read()
220
            data = data.replace(b'\n', linesep)
221
            target.write(data)
222 223 224 225 226 227 228
        elif isinstance(message, (str, bytes, io.StringIO)):
            if isinstance(message, io.StringIO):
                warnings.warn("Use of StringIO input is deprecated, "
                    "use BytesIO instead", DeprecationWarning, 3)
                message = message.getvalue()
            if isinstance(message, str):
                message = self._string_to_bytes(message)
229
            if mangle_from_:
230 231
                message = message.replace(b'\nFrom ', b'\n>From ')
            message = message.replace(b'\n', linesep)
232 233
            target.write(message)
        elif hasattr(message, 'read'):
234 235 236 237
            if hasattr(message, 'buffer'):
                warnings.warn("Use of text mode files is deprecated, "
                    "use a binary mode file instead", DeprecationWarning, 3)
                message = message.buffer
238 239
            while True:
                line = message.readline()
240 241 242 243 244
                # Universal newline support.
                if line.endswith(b'\r\n'):
                    line = line[:-2] + b'\n'
                elif line.endswith(b'\r'):
                    line = line[:-1] + b'\n'
245
                if not line:
246
                    break
247 248 249
                if mangle_from_ and line.startswith(b'From '):
                    line = b'>From ' + line[5:]
                line = line.replace(b'\n', linesep)
250 251 252 253 254 255 256 257 258 259
                target.write(line)
        else:
            raise TypeError('Invalid message type: %s' % type(message))


class Maildir(Mailbox):
    """A qmail-style Maildir mailbox."""

    colon = ':'

260
    def __init__(self, dirname, factory=None, create=True):
261 262
        """Initialize a Maildir instance."""
        Mailbox.__init__(self, dirname, factory, create)
263 264 265 266 267
        self._paths = {
            'tmp': os.path.join(self._path, 'tmp'),
            'new': os.path.join(self._path, 'new'),
            'cur': os.path.join(self._path, 'cur'),
            }
268 269
        if not os.path.exists(self._path):
            if create:
270
                os.mkdir(self._path, 0o700)
271 272
                for path in self._paths.values():
                    os.mkdir(path, 0o700)
273 274 275
            else:
                raise NoSuchMailboxError(self._path)
        self._toc = {}
276 277 278
        self._toc_mtimes = {'cur': 0, 'new': 0}
        self._last_read = 0         # Records last time we read cur/new
        self._skewfactor = 0.1      # Adjust if os/fs clocks are skewing
279 280 281 282 283 284

    def add(self, message):
        """Add message and return assigned key."""
        tmp_file = self._create_tmp()
        try:
            self._dump_message(message, tmp_file)
285 286 287 288 289
        except BaseException:
            tmp_file.close()
            os.remove(tmp_file.name)
            raise
        _sync_close(tmp_file)
290 291 292 293 294 295 296 297 298 299
        if isinstance(message, MaildirMessage):
            subdir = message.get_subdir()
            suffix = self.colon + message.get_info()
            if suffix == self.colon:
                suffix = ''
        else:
            subdir = 'new'
            suffix = ''
        uniq = os.path.basename(tmp_file.name).split(self.colon)[0]
        dest = os.path.join(self._path, subdir, uniq + suffix)
300 301 302 303 304 305
        try:
            if hasattr(os, 'link'):
                os.link(tmp_file.name, dest)
                os.remove(tmp_file.name)
            else:
                os.rename(tmp_file.name, dest)
306
        except OSError as e:
307 308 309 310 311 312
            os.remove(tmp_file.name)
            if e.errno == errno.EEXIST:
                raise ExternalClashError('Name clash with existing message: %s'
                                         % dest)
            else:
                raise
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
        if isinstance(message, MaildirMessage):
            os.utime(dest, (os.path.getatime(dest), message.get_date()))
        return uniq

    def remove(self, key):
        """Remove the keyed message; raise KeyError if it doesn't exist."""
        os.remove(os.path.join(self._path, self._lookup(key)))

    def discard(self, key):
        """If the keyed message exists, remove it."""
        # This overrides an inapplicable implementation in the superclass.
        try:
            self.remove(key)
        except KeyError:
            pass
328
        except OSError as e:
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
            if e.errno != errno.ENOENT:
                raise

    def __setitem__(self, key, message):
        """Replace the keyed message; raise KeyError if it doesn't exist."""
        old_subpath = self._lookup(key)
        temp_key = self.add(message)
        temp_subpath = self._lookup(temp_key)
        if isinstance(message, MaildirMessage):
            # temp's subdir and suffix were specified by message.
            dominant_subpath = temp_subpath
        else:
            # temp's subdir and suffix were defaults from add().
            dominant_subpath = old_subpath
        subdir = os.path.dirname(dominant_subpath)
        if self.colon in dominant_subpath:
            suffix = self.colon + dominant_subpath.split(self.colon)[-1]
        else:
            suffix = ''
        self.discard(key)
        new_path = os.path.join(self._path, subdir, key + suffix)
        os.rename(os.path.join(self._path, temp_subpath), new_path)
        if isinstance(message, MaildirMessage):
            os.utime(new_path, (os.path.getatime(new_path),
                                message.get_date()))

    def get_message(self, key):
        """Return a Message representation or raise a KeyError."""
        subpath = self._lookup(key)
358
        f = open(os.path.join(self._path, subpath), 'rb')
359
        try:
360 361 362 363
            if self._factory:
                msg = self._factory(f)
            else:
                msg = MaildirMessage(f)
364 365 366 367 368 369 370 371 372
        finally:
            f.close()
        subdir, name = os.path.split(subpath)
        msg.set_subdir(subdir)
        if self.colon in name:
            msg.set_info(name.split(self.colon)[-1])
        msg.set_date(os.path.getmtime(os.path.join(self._path, subpath)))
        return msg

373 374 375
    def get_bytes(self, key):
        """Return a bytes representation or raise a KeyError."""
        f = open(os.path.join(self._path, self._lookup(key)), 'rb')
376
        try:
377
            return f.read().replace(linesep, b'\n')
378 379 380 381 382
        finally:
            f.close()

    def get_file(self, key):
        """Return a file-like representation or raise a KeyError."""
383
        f = open(os.path.join(self._path, self._lookup(key)), 'rb')
384 385 386 387 388 389 390 391 392 393 394 395
        return _ProxyFile(f)

    def iterkeys(self):
        """Return an iterator over keys."""
        self._refresh()
        for key in self._toc:
            try:
                self._lookup(key)
            except KeyError:
                continue
            yield key

396
    def __contains__(self, key):
397 398 399 400 401 402 403 404 405 406 407
        """Return True if the keyed message exists, False otherwise."""
        self._refresh()
        return key in self._toc

    def __len__(self):
        """Return a count of messages in the mailbox."""
        self._refresh()
        return len(self._toc)

    def flush(self):
        """Write any pending changes to disk."""
408
        # Maildir changes are always written immediately, so there's nothing
409 410
        # to do.
        pass
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434

    def lock(self):
        """Lock the mailbox."""
        return

    def unlock(self):
        """Unlock the mailbox if it is locked."""
        return

    def close(self):
        """Flush and close the mailbox."""
        return

    def list_folders(self):
        """Return a list of folder names."""
        result = []
        for entry in os.listdir(self._path):
            if len(entry) > 1 and entry[0] == '.' and \
               os.path.isdir(os.path.join(self._path, entry)):
                result.append(entry[1:])
        return result

    def get_folder(self, folder):
        """Return a Maildir instance for the named folder."""
435 436 437
        return Maildir(os.path.join(self._path, '.' + folder),
                       factory=self._factory,
                       create=False)
438 439 440 441

    def add_folder(self, folder):
        """Create a folder and return a Maildir instance representing it."""
        path = os.path.join(self._path, '.' + folder)
442
        result = Maildir(path, factory=self._factory)
443 444
        maildirfolder_path = os.path.join(path, 'maildirfolder')
        if not os.path.exists(maildirfolder_path):
Georg Brandl's avatar
Georg Brandl committed
445 446
            os.close(os.open(maildirfolder_path, os.O_CREAT | os.O_WRONLY,
                0o666))
447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
        return result

    def remove_folder(self, folder):
        """Delete the named folder, which must be empty."""
        path = os.path.join(self._path, '.' + folder)
        for entry in os.listdir(os.path.join(path, 'new')) + \
                     os.listdir(os.path.join(path, 'cur')):
            if len(entry) < 1 or entry[0] != '.':
                raise NotEmptyError('Folder contains message(s): %s' % folder)
        for entry in os.listdir(path):
            if entry != 'new' and entry != 'cur' and entry != 'tmp' and \
               os.path.isdir(os.path.join(path, entry)):
                raise NotEmptyError("Folder contains subdirectory '%s': %s" %
                                    (folder, entry))
        for root, dirs, files in os.walk(path, topdown=False):
            for entry in files:
                os.remove(os.path.join(root, entry))
            for entry in dirs:
                os.rmdir(os.path.join(root, entry))
        os.rmdir(path)

    def clean(self):
        """Delete old files in "tmp"."""
        now = time.time()
        for entry in os.listdir(os.path.join(self._path, 'tmp')):
            path = os.path.join(self._path, 'tmp', entry)
            if now - os.path.getatime(path) > 129600:   # 60 * 60 * 36
                os.remove(path)

    _count = 1  # This is used to generate unique file names.

    def _create_tmp(self):
        """Create a file in the tmp subdirectory and open and return it."""
        now = time.time()
        hostname = socket.gethostname()
        if '/' in hostname:
            hostname = hostname.replace('/', r'\057')
        if ':' in hostname:
            hostname = hostname.replace(':', r'\072')
        uniq = "%s.M%sP%sQ%s.%s" % (int(now), int(now % 1 * 1e6), os.getpid(),
                                    Maildir._count, hostname)
        path = os.path.join(self._path, 'tmp', uniq)
        try:
            os.stat(path)
491
        except OSError as e:
492 493
            if e.errno == errno.ENOENT:
                Maildir._count += 1
494 495
                try:
                    return _create_carefully(path)
496
                except OSError as e:
497 498
                    if e.errno != errno.EEXIST:
                        raise
499 500
            else:
                raise
501 502 503 504

        # Fall through to here if stat succeeded or open raised EEXIST.
        raise ExternalClashError('Name clash prevented file creation: %s' %
                                 path)
505 506 507

    def _refresh(self):
        """Update table of contents mapping."""
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
        # If it has been less than two seconds since the last _refresh() call,
        # we have to unconditionally re-read the mailbox just in case it has
        # been modified, because os.path.mtime() has a 2 sec resolution in the
        # most common worst case (FAT) and a 1 sec resolution typically.  This
        # results in a few unnecessary re-reads when _refresh() is called
        # multiple times in that interval, but once the clock ticks over, we
        # will only re-read as needed.  Because the filesystem might be being
        # served by an independent system with its own clock, we record and
        # compare with the mtimes from the filesystem.  Because the other
        # system's clock might be skewing relative to our clock, we add an
        # extra delta to our wait.  The default is one tenth second, but is an
        # instance variable and so can be adjusted if dealing with a
        # particularly skewed or irregular system.
        if time.time() - self._last_read > 2 + self._skewfactor:
            refresh = False
            for subdir in self._toc_mtimes:
                mtime = os.path.getmtime(self._paths[subdir])
                if mtime > self._toc_mtimes[subdir]:
                    refresh = True
                self._toc_mtimes[subdir] = mtime
            if not refresh:
Benjamin Peterson's avatar
Benjamin Peterson committed
529
                return
530
        # Refresh toc
531
        self._toc = {}
532 533
        for subdir in self._toc_mtimes:
            path = self._paths[subdir]
534 535
            for entry in os.listdir(path):
                p = os.path.join(path, entry)
536 537
                if os.path.isdir(p):
                    continue
538 539
                uniq = entry.split(self.colon)[0]
                self._toc[uniq] = os.path.join(subdir, entry)
540
        self._last_read = time.time()
541

542 543 544 545 546 547 548 549 550 551 552 553 554 555
    def _lookup(self, key):
        """Use TOC to return subpath for given key, or raise a KeyError."""
        try:
            if os.path.exists(os.path.join(self._path, self._toc[key])):
                return self._toc[key]
        except KeyError:
            pass
        self._refresh()
        try:
            return self._toc[key]
        except KeyError:
            raise KeyError('No message with key: %s' % key)

    # This method is for backward compatibility only.
Fred Drake's avatar
Fred Drake committed
556
    def next(self):
557 558
        """Return the next message in a one-time iteration."""
        if not hasattr(self, '_onetime_keys'):
559
            self._onetime_keys = iter(self.keys())
560
        while True:
Fred Drake's avatar
Fred Drake committed
561
            try:
562
                return self[next(self._onetime_keys)]
563
            except StopIteration:
Fred Drake's avatar
Fred Drake committed
564
                return None
565 566 567 568 569 570 571 572 573 574 575
            except KeyError:
                continue


class _singlefileMailbox(Mailbox):
    """A single-file mailbox."""

    def __init__(self, path, factory=None, create=True):
        """Initialize a single-file mailbox."""
        Mailbox.__init__(self, path, factory, create)
        try:
576
            f = open(self._path, 'rb+')
577
        except IOError as e:
578 579
            if e.errno == errno.ENOENT:
                if create:
580
                    f = open(self._path, 'wb+')
581 582
                else:
                    raise NoSuchMailboxError(self._path)
583
            elif e.errno in (errno.EACCES, errno.EROFS):
584
                f = open(self._path, 'rb')
585 586 587 588 589 590 591
            else:
                raise
        self._file = f
        self._toc = None
        self._next_key = 0
        self._pending = False   # No changes require rewriting the file.
        self._locked = False
592
        self._file_length = None        # Used to record mailbox size
593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619

    def add(self, message):
        """Add message and return assigned key."""
        self._lookup()
        self._toc[self._next_key] = self._append_message(message)
        self._next_key += 1
        self._pending = True
        return self._next_key - 1

    def remove(self, key):
        """Remove the keyed message; raise KeyError if it doesn't exist."""
        self._lookup(key)
        del self._toc[key]
        self._pending = True

    def __setitem__(self, key, message):
        """Replace the keyed message; raise KeyError if it doesn't exist."""
        self._lookup(key)
        self._toc[key] = self._append_message(message)
        self._pending = True

    def iterkeys(self):
        """Return an iterator over keys."""
        self._lookup()
        for key in self._toc.keys():
            yield key

620
    def __contains__(self, key):
621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
        """Return True if the keyed message exists, False otherwise."""
        self._lookup()
        return key in self._toc

    def __len__(self):
        """Return a count of messages in the mailbox."""
        self._lookup()
        return len(self._toc)

    def lock(self):
        """Lock the mailbox."""
        if not self._locked:
            _lock_file(self._file)
            self._locked = True

    def unlock(self):
        """Unlock the mailbox if it is locked."""
        if self._locked:
            _unlock_file(self._file)
            self._locked = False

    def flush(self):
        """Write any pending changes to disk."""
        if not self._pending:
            return
646 647 648 649 650

        # In order to be writing anything out at all, self._toc must
        # already have been generated (and presumably has been modified
        # by adding or deleting an item).
        assert self._toc is not None
651

652 653 654 655 656 657 658 659
        # Check length of self._file; if it's changed, some other process
        # has modified the mailbox since we scanned it.
        self._file.seek(0, 2)
        cur_len = self._file.tell()
        if cur_len != self._file_length:
            raise ExternalClashError('Size of mailbox file changed '
                                     '(expected %i, found %i)' %
                                     (self._file_length, cur_len))
660

661 662 663 664 665 666 667 668 669 670 671 672
        new_file = _create_temporary(self._path)
        try:
            new_toc = {}
            self._pre_mailbox_hook(new_file)
            for key in sorted(self._toc.keys()):
                start, stop = self._toc[key]
                self._file.seek(start)
                self._pre_message_hook(new_file)
                new_start = new_file.tell()
                while True:
                    buffer = self._file.read(min(4096,
                                                 stop - self._file.tell()))
673
                    if not buffer:
674 675 676 677 678 679 680 681
                        break
                    new_file.write(buffer)
                new_toc[key] = (new_start, new_file.tell())
                self._post_message_hook(new_file)
        except:
            new_file.close()
            os.remove(new_file.name)
            raise
682 683
        _sync_close(new_file)
        # self._file is about to get replaced, so no need to sync.
684 685 686
        self._file.close()
        try:
            os.rename(new_file.name, self._path)
687
        except OSError as e:
688 689
            if e.errno == errno.EEXIST or \
              (os.name == 'os2' and e.errno == errno.EACCES):
690 691 692 693 694 695 696 697
                os.remove(self._path)
                os.rename(new_file.name, self._path)
            else:
                raise
        self._file = open(self._path, 'rb+')
        self._toc = new_toc
        self._pending = False
        if self._locked:
698
            _lock_file(self._file, dotlock=False)
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716

    def _pre_mailbox_hook(self, f):
        """Called before writing the mailbox to file f."""
        return

    def _pre_message_hook(self, f):
        """Called before writing each message to file f."""
        return

    def _post_message_hook(self, f):
        """Called after writing each message to file f."""
        return

    def close(self):
        """Flush and close the mailbox."""
        self.flush()
        if self._locked:
            self.unlock()
717
        self._file.close()  # Sync has been done by self.flush() above.
718 719 720 721 722 723 724 725 726 727 728 729 730 731

    def _lookup(self, key=None):
        """Return (start, stop) or raise KeyError."""
        if self._toc is None:
            self._generate_toc()
        if key is not None:
            try:
                return self._toc[key]
            except KeyError:
                raise KeyError('No message with key: %s' % key)

    def _append_message(self, message):
        """Append message to mailbox and return (start, stop) offsets."""
        self._file.seek(0, 2)
732 733 734 735 736 737 738 739
        before = self._file.tell()
        try:
            self._pre_message_hook(self._file)
            offsets = self._install_message(message)
            self._post_message_hook(self._file)
        except BaseException:
            self._file.truncate(before)
            raise
740
        self._file.flush()
741
        self._file_length = self._file.tell()  # Record current length of mailbox
742 743 744 745 746 747 748 749 750 751 752 753 754
        return offsets



class _mboxMMDF(_singlefileMailbox):
    """An mbox or MMDF mailbox."""

    _mangle_from_ = True

    def get_message(self, key):
        """Return a Message representation or raise a KeyError."""
        start, stop = self._lookup(key)
        self._file.seek(start)
755
        from_line = self._file.readline().replace(linesep, b'')
756
        string = self._file.read(stop - self._file.tell())
757 758
        msg = self._message_factory(string.replace(linesep, b'\n'))
        msg.set_from(from_line[5:].decode('ascii'))
759 760 761
        return msg

    def get_string(self, key, from_=False):
762 763 764 765 766
        """Return a string representation or raise a KeyError."""
        return email.message_from_bytes(
            self.get_bytes(key)).as_string(unixfrom=from_)

    def get_bytes(self, key, from_=False):
767 768 769 770 771 772
        """Return a string representation or raise a KeyError."""
        start, stop = self._lookup(key)
        self._file.seek(start)
        if not from_:
            self._file.readline()
        string = self._file.read(stop - self._file.tell())
773
        return string.replace(linesep, b'\n')
774 775 776 777 778 779 780 781 782 783 784 785

    def get_file(self, key, from_=False):
        """Return a file-like representation or raise a KeyError."""
        start, stop = self._lookup(key)
        self._file.seek(start)
        if not from_:
            self._file.readline()
        return _PartialFile(self._file, self._file.tell(), stop)

    def _install_message(self, message):
        """Format a message and blindly write to self._file."""
        from_line = None
786 787 788 789
        if isinstance(message, str):
            message = self._string_to_bytes(message)
        if isinstance(message, bytes) and message.startswith(b'From '):
            newline = message.find(b'\n')
790 791 792 793 794
            if newline != -1:
                from_line = message[:newline]
                message = message[newline + 1:]
            else:
                from_line = message
795
                message = b''
796
        elif isinstance(message, _mboxMMDFMessage):
797 798
            author = message.get_from().encode('ascii')
            from_line = b'From ' + author
799
        elif isinstance(message, email.message.Message):
800
            from_line = message.get_unixfrom()  # May be None.
801 802
            if from_line is not None:
                from_line = from_line.encode('ascii')
803
        if from_line is None:
804
            from_line = b'From MAILER-DAEMON ' + time.asctime(time.gmtime()).encode()
805
        start = self._file.tell()
806
        self._file.write(from_line + linesep)
807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
        self._dump_message(message, self._file, self._mangle_from_)
        stop = self._file.tell()
        return (start, stop)


class mbox(_mboxMMDF):
    """A classic mbox mailbox."""

    _mangle_from_ = True

    def __init__(self, path, factory=None, create=True):
        """Initialize an mbox mailbox."""
        self._message_factory = mboxMessage
        _mboxMMDF.__init__(self, path, factory, create)

    def _pre_message_hook(self, f):
        """Called before writing each message to file f."""
        if f.tell() != 0:
825
            f.write(linesep)
826 827 828 829 830 831 832 833

    def _generate_toc(self):
        """Generate key-to-(start, stop) table of contents."""
        starts, stops = [], []
        self._file.seek(0)
        while True:
            line_pos = self._file.tell()
            line = self._file.readline()
834
            if line.startswith(b'From '):
835
                if len(stops) < len(starts):
836
                    stops.append(line_pos - len(linesep))
837
                starts.append(line_pos)
838
            elif not line:
839
                stops.append(line_pos)
Fred Drake's avatar
Fred Drake committed
840
                break
841 842
        self._toc = dict(enumerate(zip(starts, stops)))
        self._next_key = len(self._toc)
843
        self._file_length = self._file.tell()
844

845

846 847
class MMDF(_mboxMMDF):
    """An MMDF mailbox."""
848

849 850 851 852
    def __init__(self, path, factory=None, create=True):
        """Initialize an MMDF mailbox."""
        self._message_factory = MMDFMessage
        _mboxMMDF.__init__(self, path, factory, create)
Fred Drake's avatar
Fred Drake committed
853

854 855
    def _pre_message_hook(self, f):
        """Called before writing each message to file f."""
856
        f.write(b'\001\001\001\001' + linesep)
857

858 859
    def _post_message_hook(self, f):
        """Called after writing each message to file f."""
860
        f.write(linesep + b'\001\001\001\001' + linesep)
861 862 863 864 865 866 867 868 869 870

    def _generate_toc(self):
        """Generate key-to-(start, stop) table of contents."""
        starts, stops = [], []
        self._file.seek(0)
        next_pos = 0
        while True:
            line_pos = next_pos
            line = self._file.readline()
            next_pos = self._file.tell()
871
            if line.startswith(b'\001\001\001\001' + linesep):
872 873 874 875 876
                starts.append(next_pos)
                while True:
                    line_pos = next_pos
                    line = self._file.readline()
                    next_pos = self._file.tell()
877 878
                    if line == b'\001\001\001\001' + linesep:
                        stops.append(line_pos - len(linesep))
879
                        break
880
                    elif not line:
881 882
                        stops.append(line_pos)
                        break
883
            elif not line:
884 885 886
                break
        self._toc = dict(enumerate(zip(starts, stops)))
        self._next_key = len(self._toc)
887 888
        self._file.seek(0, 2)
        self._file_length = self._file.tell()
889 890 891 892 893 894 895 896 897 898


class MH(Mailbox):
    """An MH mailbox."""

    def __init__(self, path, factory=None, create=True):
        """Initialize an MH instance."""
        Mailbox.__init__(self, path, factory, create)
        if not os.path.exists(self._path):
            if create:
899
                os.mkdir(self._path, 0o700)
900
                os.close(os.open(os.path.join(self._path, '.mh_sequences'),
901
                                 os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600))
902 903 904 905 906 907 908 909 910 911 912 913 914
            else:
                raise NoSuchMailboxError(self._path)
        self._locked = False

    def add(self, message):
        """Add message and return assigned key."""
        keys = self.keys()
        if len(keys) == 0:
            new_key = 1
        else:
            new_key = max(keys) + 1
        new_path = os.path.join(self._path, str(new_key))
        f = _create_carefully(new_path)
915
        closed = False
916 917 918 919
        try:
            if self._locked:
                _lock_file(f)
            try:
920 921 922
                try:
                    self._dump_message(message, f)
                except BaseException:
923 924 925 926 927
                    # Unlock and close so it can be deleted on Windows
                    if self._locked:
                        _unlock_file(f)
                    _sync_close(f)
                    closed = True
928 929
                    os.remove(new_path)
                    raise
930 931 932 933 934 935
                if isinstance(message, MHMessage):
                    self._dump_sequences(message, new_key)
            finally:
                if self._locked:
                    _unlock_file(f)
        finally:
936 937
            if not closed:
                _sync_close(f)
938 939 940 941 942 943 944
        return new_key

    def remove(self, key):
        """Remove the keyed message; raise KeyError if it doesn't exist."""
        path = os.path.join(self._path, str(key))
        try:
            f = open(path, 'rb+')
945
        except IOError as e:
946 947 948 949
            if e.errno == errno.ENOENT:
                raise KeyError('No message with key: %s' % key)
            else:
                raise
Benjamin Peterson's avatar
Benjamin Peterson committed
950
        else:
951
            f.close()
Benjamin Peterson's avatar
Benjamin Peterson committed
952
            os.remove(path)
953 954 955 956 957

    def __setitem__(self, key, message):
        """Replace the keyed message; raise KeyError if it doesn't exist."""
        path = os.path.join(self._path, str(key))
        try:
958
            f = open(path, 'rb+')
959
        except IOError as e:
960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975
            if e.errno == errno.ENOENT:
                raise KeyError('No message with key: %s' % key)
            else:
                raise
        try:
            if self._locked:
                _lock_file(f)
            try:
                os.close(os.open(path, os.O_WRONLY | os.O_TRUNC))
                self._dump_message(message, f)
                if isinstance(message, MHMessage):
                    self._dump_sequences(message, key)
            finally:
                if self._locked:
                    _unlock_file(f)
        finally:
976
            _sync_close(f)
977 978 979 980 981

    def get_message(self, key):
        """Return a Message representation or raise a KeyError."""
        try:
            if self._locked:
982
                f = open(os.path.join(self._path, str(key)), 'rb+')
983
            else:
984
                f = open(os.path.join(self._path, str(key)), 'rb')
985
        except IOError as e:
986 987 988 989 990 991 992 993 994 995 996 997 998 999
            if e.errno == errno.ENOENT:
                raise KeyError('No message with key: %s' % key)
            else:
                raise
        try:
            if self._locked:
                _lock_file(f)
            try:
                msg = MHMessage(f)
            finally:
                if self._locked:
                    _unlock_file(f)
        finally:
            f.close()
1000
        for name, key_list in self.get_sequences().items():
1001 1002 1003 1004
            if key in key_list:
                msg.add_sequence(name)
        return msg

1005 1006
    def get_bytes(self, key):
        """Return a bytes representation or raise a KeyError."""
1007 1008
        try:
            if self._locked:
1009
                f = open(os.path.join(self._path, str(key)), 'rb+')
1010
            else:
1011
                f = open(os.path.join(self._path, str(key)), 'rb')
1012
        except IOError as e:
1013 1014 1015 1016 1017 1018 1019 1020
            if e.errno == errno.ENOENT:
                raise KeyError('No message with key: %s' % key)
            else:
                raise
        try:
            if self._locked:
                _lock_file(f)
            try:
1021
                return f.read().replace(linesep, b'\n')
1022 1023 1024 1025 1026 1027 1028 1029 1030
            finally:
                if self._locked:
                    _unlock_file(f)
        finally:
            f.close()

    def get_file(self, key):
        """Return a file-like representation or raise a KeyError."""
        try:
1031
            f = open(os.path.join(self._path, str(key)), 'rb')
1032
        except IOError as e:
1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043
            if e.errno == errno.ENOENT:
                raise KeyError('No message with key: %s' % key)
            else:
                raise
        return _ProxyFile(f)

    def iterkeys(self):
        """Return an iterator over keys."""
        return iter(sorted(int(entry) for entry in os.listdir(self._path)
                                      if entry.isdigit()))

1044
    def __contains__(self, key):
1045 1046 1047 1048 1049
        """Return True if the keyed message exists, False otherwise."""
        return os.path.exists(os.path.join(self._path, str(key)))

    def __len__(self):
        """Return a count of messages in the mailbox."""
1050
        return len(list(self.keys()))
1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062

    def lock(self):
        """Lock the mailbox."""
        if not self._locked:
            self._file = open(os.path.join(self._path, '.mh_sequences'), 'rb+')
            _lock_file(self._file)
            self._locked = True

    def unlock(self):
        """Unlock the mailbox if it is locked."""
        if self._locked:
            _unlock_file(self._file)
1063
            _sync_close(self._file)
1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
            del self._file
            self._locked = False

    def flush(self):
        """Write any pending changes to the disk."""
        return

    def close(self):
        """Flush and close the mailbox."""
        if self._locked:
            self.unlock()

    def list_folders(self):
        """Return a list of folder names."""
        result = []
        for entry in os.listdir(self._path):
            if os.path.isdir(os.path.join(self._path, entry)):
                result.append(entry)
        return result

    def get_folder(self, folder):
        """Return an MH instance for the named folder."""
1086 1087
        return MH(os.path.join(self._path, folder),
                  factory=self._factory, create=False)
1088 1089 1090

    def add_folder(self, folder):
        """Create a folder and return an MH instance representing it."""
1091 1092
        return MH(os.path.join(self._path, folder),
                  factory=self._factory)
1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108

    def remove_folder(self, folder):
        """Delete the named folder, which must be empty."""
        path = os.path.join(self._path, folder)
        entries = os.listdir(path)
        if entries == ['.mh_sequences']:
            os.remove(os.path.join(path, '.mh_sequences'))
        elif entries == []:
            pass
        else:
            raise NotEmptyError('Folder not empty: %s' % self._path)
        os.rmdir(path)

    def get_sequences(self):
        """Return a name-to-key-list dictionary to define each sequence."""
        results = {}
1109
        with open(os.path.join(self._path, '.mh_sequences'), 'r', encoding='ASCII') as f:
1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131
            all_keys = set(self.keys())
            for line in f:
                try:
                    name, contents = line.split(':')
                    keys = set()
                    for spec in contents.split():
                        if spec.isdigit():
                            keys.add(int(spec))
                        else:
                            start, stop = (int(x) for x in spec.split('-'))
                            keys.update(range(start, stop + 1))
                    results[name] = [key for key in sorted(keys) \
                                         if key in all_keys]
                    if len(results[name]) == 0:
                        del results[name]
                except ValueError:
                    raise FormatError('Invalid sequence specification: %s' %
                                      line.rstrip())
        return results

    def set_sequences(self, sequences):
        """Set sequences using the given name-to-key-list dictionary."""
1132
        f = open(os.path.join(self._path, '.mh_sequences'), 'r+', encoding='ASCII')
1133 1134
        try:
            os.close(os.open(f.name, os.O_WRONLY | os.O_TRUNC))
1135
            for name, keys in sequences.items():
1136 1137
                if len(keys) == 0:
                    continue
1138
                f.write(name + ':')
1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156
                prev = None
                completing = False
                for key in sorted(set(keys)):
                    if key - 1 == prev:
                        if not completing:
                            completing = True
                            f.write('-')
                    elif completing:
                        completing = False
                        f.write('%s %s' % (prev, key))
                    else:
                        f.write(' %s' % key)
                    prev = key
                if completing:
                    f.write(str(prev) + '\n')
                else:
                    f.write('\n')
        finally:
1157
            _sync_close(f)
1158 1159 1160 1161 1162 1163

    def pack(self):
        """Re-name messages to eliminate numbering gaps. Invalidates keys."""
        sequences = self.get_sequences()
        prev = 0
        changes = []
1164
        for key in self.keys():
1165 1166
            if key - 1 != prev:
                changes.append((key, prev + 1))
1167 1168 1169 1170 1171 1172 1173
                if hasattr(os, 'link'):
                    os.link(os.path.join(self._path, str(key)),
                            os.path.join(self._path, str(prev + 1)))
                    os.unlink(os.path.join(self._path, str(key)))
                else:
                    os.rename(os.path.join(self._path, str(key)),
                              os.path.join(self._path, str(prev + 1)))
1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187
            prev += 1
        self._next_key = prev + 1
        if len(changes) == 0:
            return
        for name, key_list in sequences.items():
            for old, new in changes:
                if old in key_list:
                    key_list[key_list.index(old)] = new
        self.set_sequences(sequences)

    def _dump_sequences(self, message, key):
        """Inspect a new MHMessage and update sequences appropriately."""
        pending_sequences = message.get_sequences()
        all_sequences = self.get_sequences()
1188
        for name, key_list in all_sequences.items():
1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
            if name in pending_sequences:
                key_list.append(key)
            elif key in key_list:
                del key_list[key_list.index(key)]
        for sequence in pending_sequences:
            if sequence not in all_sequences:
                all_sequences[sequence] = [key]
        self.set_sequences(all_sequences)


class Babyl(_singlefileMailbox):
    """An Rmail-style Babyl mailbox."""

    _special_labels = frozenset(('unseen', 'deleted', 'filed', 'answered',
                                 'forwarded', 'edited', 'resent'))

    def __init__(self, path, factory=None, create=True):
        """Initialize a Babyl mailbox."""
        _singlefileMailbox.__init__(self, path, factory, create)
        self._labels = {}

    def add(self, message):
        """Add message and return assigned key."""
        key = _singlefileMailbox.add(self, message)
        if isinstance(message, BabylMessage):
            self._labels[key] = message.get_labels()
        return key

    def remove(self, key):
        """Remove the keyed message; raise KeyError if it doesn't exist."""
        _singlefileMailbox.remove(self, key)
        if key in self._labels:
            del self._labels[key]

    def __setitem__(self, key, message):
        """Replace the keyed message; raise KeyError if it doesn't exist."""
        _singlefileMailbox.__setitem__(self, key, message)
        if isinstance(message, BabylMessage):
            self._labels[key] = message.get_labels()

    def get_message(self, key):
        """Return a Message representation or raise a KeyError."""
        start, stop = self._lookup(key)
        self._file.seek(start)
1233 1234
        self._file.readline()   # Skip b'1,' line specifying labels.
        original_headers = io.BytesIO()
1235 1236
        while True:
            line = self._file.readline()
1237
            if line == b'*** EOOH ***' + linesep or not line:
1238
                break
1239 1240
            original_headers.write(line.replace(linesep, b'\n'))
        visible_headers = io.BytesIO()
1241 1242
        while True:
            line = self._file.readline()
1243
            if line == linesep or not line:
1244
                break
1245 1246 1247 1248 1249 1250
            visible_headers.write(line.replace(linesep, b'\n'))
        # Read up to the stop, or to the end
        n = stop - self._file.tell()
        assert n >= 0
        body = self._file.read(n)
        body = body.replace(linesep, b'\n')
1251 1252 1253 1254 1255 1256
        msg = BabylMessage(original_headers.getvalue() + body)
        msg.set_visible(visible_headers.getvalue())
        if key in self._labels:
            msg.set_labels(self._labels[key])
        return msg

1257
    def get_bytes(self, key):
1258 1259 1260
        """Return a string representation or raise a KeyError."""
        start, stop = self._lookup(key)
        self._file.seek(start)
1261 1262
        self._file.readline()   # Skip b'1,' line specifying labels.
        original_headers = io.BytesIO()
1263 1264
        while True:
            line = self._file.readline()
1265
            if line == b'*** EOOH ***' + linesep or not line:
Fred Drake's avatar
Fred Drake committed
1266
                break
1267
            original_headers.write(line.replace(linesep, b'\n'))
1268 1269
        while True:
            line = self._file.readline()
1270
            if line == linesep or not line:
1271
                break
1272 1273 1274 1275 1276 1277
        headers = original_headers.getvalue()
        n = stop - self._file.tell()
        assert n >= 0
        data = self._file.read(n)
        data = data.replace(linesep, b'\n')
        return headers + data
1278 1279 1280

    def get_file(self, key):
        """Return a file-like representation or raise a KeyError."""
1281
        return io.BytesIO(self.get_bytes(key).replace(b'\n', linesep))
1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301

    def get_labels(self):
        """Return a list of user-defined labels in the mailbox."""
        self._lookup()
        labels = set()
        for label_list in self._labels.values():
            labels.update(label_list)
        labels.difference_update(self._special_labels)
        return list(labels)

    def _generate_toc(self):
        """Generate key-to-(start, stop) table of contents."""
        starts, stops = [], []
        self._file.seek(0)
        next_pos = 0
        label_lists = []
        while True:
            line_pos = next_pos
            line = self._file.readline()
            next_pos = self._file.tell()
1302
            if line == b'\037\014' + linesep:
1303
                if len(stops) < len(starts):
1304
                    stops.append(line_pos - len(linesep))
1305 1306
                starts.append(next_pos)
                labels = [label.strip() for label
1307
                                        in self._file.readline()[1:].split(b',')
1308
                                        if label.strip()]
1309
                label_lists.append(labels)
1310
            elif line == b'\037' or line == b'\037' + linesep:
1311
                if len(stops) < len(starts):
1312
                    stops.append(line_pos - len(linesep))
1313
            elif not line:
1314
                stops.append(line_pos - len(linesep))
1315 1316 1317 1318
                break
        self._toc = dict(enumerate(zip(starts, stops)))
        self._labels = dict(enumerate(label_lists))
        self._next_key = len(self._toc)
1319 1320
        self._file.seek(0, 2)
        self._file_length = self._file.tell()
1321

1322 1323
    def _pre_mailbox_hook(self, f):
        """Called before writing the mailbox to file f."""
1324 1325 1326 1327 1328 1329 1330
        babyl = b'BABYL OPTIONS:' + linesep
        babyl += b'Version: 5' + linesep
        labels = self.get_labels()
        labels = (label.encode() for label in labels)
        babyl += b'Labels:' + b','.join(labels) + linesep
        babyl += b'\037'
        f.write(babyl)
1331 1332 1333

    def _pre_message_hook(self, f):
        """Called before writing each message to file f."""
1334
        f.write(b'\014' + linesep)
1335 1336 1337

    def _post_message_hook(self, f):
        """Called after writing each message to file f."""
1338
        f.write(linesep + b'\037')
1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350

    def _install_message(self, message):
        """Write message contents and return (start, stop)."""
        start = self._file.tell()
        if isinstance(message, BabylMessage):
            special_labels = []
            labels = []
            for label in message.get_labels():
                if label in self._special_labels:
                    special_labels.append(label)
                else:
                    labels.append(label)
1351
            self._file.write(b'1')
1352
            for label in special_labels:
1353 1354
                self._file.write(b', ' + label.encode())
            self._file.write(b',,')
1355
            for label in labels:
1356 1357
                self._file.write(b' ' + label.encode() + b',')
            self._file.write(linesep)
1358
        else:
1359
            self._file.write(b'1,,' + linesep)
1360
        if isinstance(message, email.message.Message):
1361 1362
            orig_buffer = io.BytesIO()
            orig_generator = email.generator.BytesGenerator(orig_buffer, False, 0)
1363 1364 1365 1366
            orig_generator.flatten(message)
            orig_buffer.seek(0)
            while True:
                line = orig_buffer.readline()
1367 1368
                self._file.write(line.replace(b'\n', linesep))
                if line == b'\n' or not line:
1369
                    break
1370
            self._file.write(b'*** EOOH ***' + linesep)
1371
            if isinstance(message, BabylMessage):
1372 1373
                vis_buffer = io.BytesIO()
                vis_generator = email.generator.BytesGenerator(vis_buffer, False, 0)
1374 1375 1376
                vis_generator.flatten(message.get_visible())
                while True:
                    line = vis_buffer.readline()
1377 1378
                    self._file.write(line.replace(b'\n', linesep))
                    if line == b'\n' or not line:
1379 1380 1381 1382 1383
                        break
            else:
                orig_buffer.seek(0)
                while True:
                    line = orig_buffer.readline()
1384 1385
                    self._file.write(line.replace(b'\n', linesep))
                    if line == b'\n' or not line:
1386 1387 1388
                        break
            while True:
                buffer = orig_buffer.read(4096) # Buffer size is arbitrary.
1389
                if not buffer:
1390
                    break
1391 1392 1393 1394 1395 1396 1397 1398 1399
                self._file.write(buffer.replace(b'\n', linesep))
        elif isinstance(message, (bytes, str, io.StringIO)):
            if isinstance(message, io.StringIO):
                warnings.warn("Use of StringIO input is deprecated, "
                    "use BytesIO instead", DeprecationWarning, 3)
                message = message.getvalue()
            if isinstance(message, str):
                message = self._string_to_bytes(message)
            body_start = message.find(b'\n\n') + 2
1400
            if body_start - 2 != -1:
1401 1402 1403 1404
                self._file.write(message[:body_start].replace(b'\n', linesep))
                self._file.write(b'*** EOOH ***' + linesep)
                self._file.write(message[:body_start].replace(b'\n', linesep))
                self._file.write(message[body_start:].replace(b'\n', linesep))
1405
            else:
1406 1407
                self._file.write(b'*** EOOH ***' + linesep + linesep)
                self._file.write(message.replace(b'\n', linesep))
1408
        elif hasattr(message, 'readline'):
1409 1410 1411 1412
            if hasattr(message, 'buffer'):
                warnings.warn("Use of text mode files is deprecated, "
                    "use a binary mode file instead", DeprecationWarning, 3)
                message = message.buffer
1413 1414 1415 1416
            original_pos = message.tell()
            first_pass = True
            while True:
                line = message.readline()
1417 1418 1419 1420 1421 1422 1423 1424
                # Universal newline support.
                if line.endswith(b'\r\n'):
                    line = line[:-2] + b'\n'
                elif line.endswith(b'\r'):
                    line = line[:-1] + b'\n'
                self._file.write(line.replace(b'\n', linesep))
                if line == b'\n' or not line:
                    self._file.write(b'*** EOOH ***' + linesep)
1425 1426 1427 1428 1429 1430 1431
                    if first_pass:
                        first_pass = False
                        message.seek(original_pos)
                    else:
                        break
            while True:
                buffer = message.read(4096)     # Buffer size is arbitrary.
1432
                if not buffer:
1433
                    break
1434
                self._file.write(buffer.replace(b'\n', linesep))
1435 1436 1437 1438 1439 1440
        else:
            raise TypeError('Invalid message type: %s' % type(message))
        stop = self._file.tell()
        return (start, stop)


1441
class Message(email.message.Message):
1442 1443 1444 1445
    """Message with mailbox-format-specific properties."""

    def __init__(self, message=None):
        """Initialize a Message instance."""
1446
        if isinstance(message, email.message.Message):
1447 1448 1449
            self._become_message(copy.deepcopy(message))
            if isinstance(message, Message):
                message._explain_to(self)
1450 1451
        elif isinstance(message, bytes):
            self._become_message(email.message_from_bytes(message))
1452 1453
        elif isinstance(message, str):
            self._become_message(email.message_from_string(message))
1454
        elif isinstance(message, io.TextIOWrapper):
1455
            self._become_message(email.message_from_file(message))
1456 1457
        elif hasattr(message, "read"):
            self._become_message(email.message_from_binary_file(message))
1458
        elif message is None:
1459
            email.message.Message.__init__(self)
1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514
        else:
            raise TypeError('Invalid message type: %s' % type(message))

    def _become_message(self, message):
        """Assume the non-format-specific state of message."""
        for name in ('_headers', '_unixfrom', '_payload', '_charset',
                     'preamble', 'epilogue', 'defects', '_default_type'):
            self.__dict__[name] = message.__dict__[name]

    def _explain_to(self, message):
        """Copy format-specific state to message insofar as possible."""
        if isinstance(message, Message):
            return  # There's nothing format-specific to explain.
        else:
            raise TypeError('Cannot convert to specified type')


class MaildirMessage(Message):
    """Message with Maildir-specific properties."""

    def __init__(self, message=None):
        """Initialize a MaildirMessage instance."""
        self._subdir = 'new'
        self._info = ''
        self._date = time.time()
        Message.__init__(self, message)

    def get_subdir(self):
        """Return 'new' or 'cur'."""
        return self._subdir

    def set_subdir(self, subdir):
        """Set subdir to 'new' or 'cur'."""
        if subdir == 'new' or subdir == 'cur':
            self._subdir = subdir
        else:
            raise ValueError("subdir must be 'new' or 'cur': %s" % subdir)

    def get_flags(self):
        """Return as a string the flags that are set."""
        if self._info.startswith('2,'):
            return self._info[2:]
        else:
            return ''

    def set_flags(self, flags):
        """Set the given flags and unset all others."""
        self._info = '2,' + ''.join(sorted(flags))

    def add_flag(self, flag):
        """Set the given flag(s) without changing others."""
        self.set_flags(''.join(set(self.get_flags()) | set(flag)))

    def remove_flag(self, flag):
        """Unset the given string flag(s) without changing others."""
1515
        if self.get_flags():
1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589
            self.set_flags(''.join(set(self.get_flags()) - set(flag)))

    def get_date(self):
        """Return delivery date of message, in seconds since the epoch."""
        return self._date

    def set_date(self, date):
        """Set delivery date of message, in seconds since the epoch."""
        try:
            self._date = float(date)
        except ValueError:
            raise TypeError("can't convert to float: %s" % date)

    def get_info(self):
        """Get the message's "info" as a string."""
        return self._info

    def set_info(self, info):
        """Set the message's "info" string."""
        if isinstance(info, str):
            self._info = info
        else:
            raise TypeError('info must be a string: %s' % type(info))

    def _explain_to(self, message):
        """Copy Maildir-specific state to message insofar as possible."""
        if isinstance(message, MaildirMessage):
            message.set_flags(self.get_flags())
            message.set_subdir(self.get_subdir())
            message.set_date(self.get_date())
        elif isinstance(message, _mboxMMDFMessage):
            flags = set(self.get_flags())
            if 'S' in flags:
                message.add_flag('R')
            if self.get_subdir() == 'cur':
                message.add_flag('O')
            if 'T' in flags:
                message.add_flag('D')
            if 'F' in flags:
                message.add_flag('F')
            if 'R' in flags:
                message.add_flag('A')
            message.set_from('MAILER-DAEMON', time.gmtime(self.get_date()))
        elif isinstance(message, MHMessage):
            flags = set(self.get_flags())
            if 'S' not in flags:
                message.add_sequence('unseen')
            if 'R' in flags:
                message.add_sequence('replied')
            if 'F' in flags:
                message.add_sequence('flagged')
        elif isinstance(message, BabylMessage):
            flags = set(self.get_flags())
            if 'S' not in flags:
                message.add_label('unseen')
            if 'T' in flags:
                message.add_label('deleted')
            if 'R' in flags:
                message.add_label('answered')
            if 'P' in flags:
                message.add_label('forwarded')
        elif isinstance(message, Message):
            pass
        else:
            raise TypeError('Cannot convert to specified type: %s' %
                            type(message))


class _mboxMMDFMessage(Message):
    """Message with mbox- or MMDF-specific properties."""

    def __init__(self, message=None):
        """Initialize an mboxMMDFMessage instance."""
        self.set_from('MAILER-DAEMON', True)
1590
        if isinstance(message, email.message.Message):

            unixfrom = message.get_unixfrom()
            if unixfrom is not None and unixfrom.startswith('From '):
                self.set_from(unixfrom[5:])
        Message.__init__(self, message)

    def get_from(self):
        """Return contents of "From " line."""
        return self._from

    def set_from(self, from_, time_=None):
        """Set "From " line, formatting and appending time_ if specified."""
        if time_ is not None:
            if time_ is True:
                time_ = time.gmtime()
            from_ += ' ' + time.asctime(time_)
        self._from = from_

    def get_flags(self):
        """Return as a string the flags that are set."""
        return self.get('Status', '') + self.get('X-Status', '')

    def set_flags(self, flags):
        """Set the given flags and unset all others."""
        flags = set(flags)
        status_flags, xstatus_flags = '', ''
        for flag in ('R', 'O'):
            if flag in flags:
                status_flags += flag
                flags.remove(flag)
        for flag in ('D', 'F', 'A'):
            if flag in flags:
                xstatus_flags += flag
                flags.remove(flag)
        xstatus_flags += ''.join(sorted(flags))
        try:
            self.replace_header('Status', status_flags)
        except KeyError:
            self.add_header('Status', status_flags)
        try:
            self.replace_header('X-Status', xstatus_flags)
        except KeyError:
            self.add_header('X-Status', xstatus_flags)

    def add_flag(self, flag):
        """Set the given flag(s) without changing others."""
        self.set_flags(''.join(set(self.get_flags()) | set(flag)))

    def remove_flag(self, flag):
        """Unset the given string flag(s) without changing others."""
        if 'Status' in self or 'X-Status' in self:
            self.set_flags(''.join(set(self.get_flags()) - set(flag)))

    def _explain_to(self, message):
        """Copy mbox- or MMDF-specific state to message insofar as possible."""
        if isinstance(message, MaildirMessage):
            flags = set(self.get_flags())
            if 'O' in flags:
                message.set_subdir('cur')
            if 'F' in flags:
                message.add_flag('F')
            if 'A' in flags:
                message.add_flag('R')
            if 'R' in flags:
                message.add_flag('S')
            if 'D' in flags:
                message.add_flag('T')
            del message['status']
            del message['x-status']
            maybe_date = ' '.join(self.get_from().split()[-5:])
            try:
                message.set_date(calendar.timegm(time.strptime(maybe_date,
                                                      '%a %b %d %H:%M:%S %Y')))
            except (ValueError, OverflowError):
                pass
        elif isinstance(message, _mboxMMDFMessage):
            message.set_flags(self.get_flags())
            message.set_from(self.get_from())
        elif isinstance(message, MHMessage):
            flags = set(self.get_flags())
            if 'R' not in flags:
                message.add_sequence('unseen')
            if 'A' in flags:
                message.add_sequence('replied')
            if 'F' in flags:
                message.add_sequence('flagged')
            del message['status']
            del message['x-status']
        elif isinstance(message, BabylMessage):
            flags = set(self.get_flags())
            if 'R' not in flags:
                message.add_label('unseen')
            if 'D' in flags:
                message.add_label('deleted')
            if 'A' in flags:
                message.add_label('answered')
            del message['status']
            del message['x-status']
        elif isinstance(message, Message):
            pass
        else:
            raise TypeError('Cannot convert to specified type: %s' %
                            type(message))


class mboxMessage(_mboxMMDFMessage):
    """Message with mbox-specific properties."""


class MHMessage(Message):
    """Message with MH-specific properties."""

    def __init__(self, message=None):
        """Initialize an MHMessage instance."""
        self._sequences = []
        Message.__init__(self, message)

    def get_sequences(self):
        """Return a list of sequences that include the message."""
        return self._sequences[:]

    def set_sequences(self, sequences):
        """Set the list of sequences that include the message."""
        self._sequences = list(sequences)

    def add_sequence(self, sequence):
        """Add sequence to list of sequences including the message."""
        if isinstance(sequence, str):
            if not sequence in self._sequences:
                self._sequences.append(sequence)
        else:
1721
            raise TypeError('sequence type must be str: %s' % type(sequence))


    def remove_sequence(self, sequence):
        """Remove sequence from the list of sequences including the message."""
        try:
            self._sequences.remove(sequence)
        except ValueError:
            pass

    def _explain_to(self, message):
        """Copy MH-specific state to message insofar as possible."""
        if isinstance(message, MaildirMessage):
            sequences = set(self.get_sequences())
            if 'unseen' in sequences:
                message.set_subdir('cur')
            else:
                message.set_subdir('cur')
                message.add_flag('S')
            if 'flagged' in sequences:
                message.add_flag('F')
            if 'replied' in sequences:
                message.add_flag('R')
        elif isinstance(message, _mboxMMDFMessage):
            sequences = set(self.get_sequences())
            if 'unseen' not in sequences:
                message.add_flag('RO')
            else:
                message.add_flag('O')
            if 'flagged' in sequences:
                message.add_flag('F')
            if 'replied' in sequences:
                message.add_flag('A')
        elif isinstance(message, MHMessage):
            for sequence in self.get_sequences():
                message.add_sequence(sequence)
        elif isinstance(message, BabylMessage):
            sequences = set(self.get_sequences())
            if 'unseen' in sequences:
                message.add_label('unseen')
            if 'replied' in sequences:
                message.add_label('answered')
        elif isinstance(message, Message):
            pass
        else:
            raise TypeError('Cannot convert to specified type: %s' %
                            type(message))


class BabylMessage(Message):
    """Message with Babyl-specific properties."""

    def __init__(self, message=None):
        """Initialize an BabylMessage instance."""
        self._labels = []
        self._visible = Message()
        Message.__init__(self, message)

    def get_labels(self):
        """Return a list of labels on the message."""
        return self._labels[:]

    def set_labels(self, labels):
        """Set the list of labels on the message."""
        self._labels = list(labels)

    def add_label(self, label):
        """Add label to list of labels on the message."""
        if isinstance(label, str):
            if label not in self._labels:
                self._labels.append(label)
        else:
            raise TypeError('label must be a string: %s' % type(label))

    def remove_label(self, label):
        """Remove label from the list of labels on the message."""
        try:
            self._labels.remove(label)
        except ValueError:
            pass

    def get_visible(self):
        """Return a Message representation of visible headers."""
        return Message(self._visible)

    def set_visible(self, visible):
        """Set the Message representation of visible headers."""
        self._visible = Message(visible)

    def update_visible(self):
        """Update and/or sensibly generate a set of visible headers."""
        for header in self._visible.keys():
            if header in self:
                self._visible.replace_header(header, self[header])
            else:
                del self._visible[header]
        for header in ('Date', 'From', 'Reply-To', 'To', 'CC', 'Subject'):
            if header in self and header not in self._visible:
                self._visible[header] = self[header]

    def _explain_to(self, message):
        """Copy Babyl-specific state to message insofar as possible."""
        if isinstance(message, MaildirMessage):
            labels = set(self.get_labels())
            if 'unseen' in labels:
                message.set_subdir('cur')
            else:
                message.set_subdir('cur')
                message.add_flag('S')
            if 'forwarded' in labels or 'resent' in labels:
                message.add_flag('P')
            if 'answered' in labels:
                message.add_flag('R')
            if 'deleted' in labels:
                message.add_flag('T')
        elif isinstance(message, _mboxMMDFMessage):
            labels = set(self.get_labels())
            if 'unseen' not in labels:
                message.add_flag('RO')
            else:
                message.add_flag('O')
            if 'deleted' in labels:
                message.add_flag('D')
            if 'answered' in labels:
                message.add_flag('A')
        elif isinstance(message, MHMessage):
            labels = set(self.get_labels())
            if 'unseen' in labels:
                message.add_sequence('unseen')
            if 'answered' in labels:
                message.add_sequence('replied')
        elif isinstance(message, BabylMessage):
            message.set_visible(self.get_visible())
            for label in self.get_labels():
                message.add_label(label)
        elif isinstance(message, Message):
            pass
        else:
            raise TypeError('Cannot convert to specified type: %s' %
                            type(message))


class MMDFMessage(_mboxMMDFMessage):
    """Message with MMDF-specific properties."""


class _ProxyFile:
    """A read-only wrapper of a file."""

    def __init__(self, f, pos=None):
        """Initialize a _ProxyFile."""
        self._file = f
        if pos is None:
            self._pos = f.tell()
        else:
            self._pos = pos

    def read(self, size=None):
        """Read bytes."""
1879
        return self._read(size, self._file.read)
1880

1881 1882 1883 1884
    def read1(self, size=None):
        """Read bytes."""
        return self._read(size, self._file.read1)

1885 1886
    def readline(self, size=None):
        """Read a line."""
1887
        return self._read(size, self._file.readline)
1888 1889 1890 1891 1892 1893 1894 1895

    def readlines(self, sizehint=None):
        """Read multiple lines."""
        result = []
        for line in self:
            result.append(line)
            if sizehint is not None:
                sizehint -= len(line)
Fred Drake's avatar
Fred Drake committed
1896 1897
                if sizehint <= 0:
                    break
1898 1899 1900 1901
        return result

    def __iter__(self):
        """Iterate over lines."""
1902 1903 1904 1905 1906
        while True:
            line = self.readline()
            if not line:
                raise StopIteration
            yield line
Fred Drake's avatar
Fred Drake committed
1907 1908

    def tell(self):
1909 1910 1911 1912 1913 1914 1915 1916 1917
        """Return the position."""
        return self._pos

    def seek(self, offset, whence=0):
        """Change position."""
        if whence == 1:
            self._file.seek(self._pos)
        self._file.seek(offset, whence)
        self._pos = self._file.tell()
Fred Drake's avatar
Fred Drake committed
1918

1919 1920
    def close(self):
        """Close the file."""
1921 1922 1923 1924
        if hasattr(self, '_file'):
            if hasattr(self._file, 'close'):
                self._file.close()
            del self._file
1925 1926 1927 1928 1929 1930 1931 1932 1933 1934

    def _read(self, size, read_method):
        """Read size bytes using read_method."""
        if size is None:
            size = -1
        self._file.seek(self._pos)
        result = read_method(size)
        self._pos = self._file.tell()
        return result

1935 1936 1937 1938 1939 1940 1941
    def __enter__(self):
        """Context manager protocol support."""
        return self

    def __exit__(self, *exc):
        self.close()

1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955
    def readable(self):
        return self._file.readable()

    def writable(self):
        return self._file.writable()

    def seekable(self):
        return self._file.seekable()

    def flush(self):
        return self._file.flush()

    @property
    def closed(self):
1956 1957 1958 1959
        if not hasattr(self, '_file'):
            return True
        if not hasattr(self._file, 'closed'):
            return False
1960 1961
        return self._file.closed

1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977

class _PartialFile(_ProxyFile):
    """A read-only wrapper of part of a file."""

    def __init__(self, f, start=None, stop=None):
        """Initialize a _PartialFile."""
        _ProxyFile.__init__(self, f, start)
        self._start = start
        self._stop = stop

    def tell(self):
        """Return the position with respect to start."""
        return _ProxyFile.tell(self) - self._start

    def seek(self, offset, whence=0):
        """Change position, possibly with respect to start or stop."""
Fred Drake's avatar
Fred Drake committed
1978
        if whence == 0:
1979 1980
            self._pos = self._start
            whence = 1
Fred Drake's avatar
Fred Drake committed
1981
        elif whence == 2:
1982 1983 1984
            self._pos = self._stop
            whence = 1
        _ProxyFile.seek(self, offset, whence)
Fred Drake's avatar
Fred Drake committed
1985

1986 1987 1988 1989
    def _read(self, size, read_method):
        """Read size bytes using read_method, honoring start and stop."""
        remaining = self._stop - self._pos
        if remaining <= 0:
1990
            return b''
1991 1992 1993 1994
        if size is None or size < 0 or size > remaining:
            size = remaining
        return _ProxyFile._read(self, size, read_method)

1995 1996 1997
    def close(self):
        # do *not* close the underlying file object for partial files,
        # since it's global to the mailbox object
1998 1999
        if hasattr(self, '_file'):
            del self._file
2000

2001 2002

def _lock_file(f, dotlock=True):
2003
    """Lock file f using lockf and dot locking."""
2004 2005 2006 2007 2008
    dotlock_done = False
    try:
        if fcntl:
            try:
                fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
2009
            except IOError as e:
2010
                if e.errno in (errno.EAGAIN, errno.EACCES, errno.EROFS):
2011 2012 2013 2014 2015 2016 2017 2018
                    raise ExternalClashError('lockf: lock unavailable: %s' %
                                             f.name)
                else:
                    raise
        if dotlock:
            try:
                pre_lock = _create_temporary(f.name + '.lock')
                pre_lock.close()
2019
            except IOError as e:
2020
                if e.errno in (errno.EACCES, errno.EROFS):
2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031
                    return  # Without write access, just skip dotlocking.
                else:
                    raise
            try:
                if hasattr(os, 'link'):
                    os.link(pre_lock.name, f.name + '.lock')
                    dotlock_done = True
                    os.unlink(pre_lock.name)
                else:
                    os.rename(pre_lock.name, f.name + '.lock')
                    dotlock_done = True
2032
            except OSError as e:
2033 2034
                if e.errno == errno.EEXIST or \
                  (os.name == 'os2' and e.errno == errno.EACCES):
2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047
                    os.remove(pre_lock.name)
                    raise ExternalClashError('dot lock unavailable: %s' %
                                             f.name)
                else:
                    raise
    except:
        if fcntl:
            fcntl.lockf(f, fcntl.LOCK_UN)
        if dotlock_done:
            os.remove(f.name + '.lock')
        raise

def _unlock_file(f):
2048
    """Unlock file f using lockf and dot locking."""
2049 2050 2051 2052 2053 2054 2055
    if fcntl:
        fcntl.lockf(f, fcntl.LOCK_UN)
    if os.path.exists(f.name + '.lock'):
        os.remove(f.name + '.lock')

def _create_carefully(path):
    """Create a file if it doesn't exist and open for reading and writing."""
Georg Brandl's avatar
Georg Brandl committed
2056
    fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_RDWR, 0o666)
2057
    try:
2058
        return open(path, 'rb+')
2059 2060 2061 2062 2063 2064 2065 2066 2067
    finally:
        os.close(fd)

def _create_temporary(path):
    """Create a temp file based on path and open for reading and writing."""
    return _create_carefully('%s.%s.%s.%s' % (path, int(time.time()),
                                              socket.gethostname(),
                                              os.getpid()))

2068 2069 2070
def _sync_flush(f):
    """Ensure changes to file f are physically on disk."""
    f.flush()
2071 2072
    if hasattr(os, 'fsync'):
        os.fsync(f.fileno())
2073 2074 2075 2076 2077

def _sync_close(f):
    """Close file f, ensuring all changes are physically on disk."""
    _sync_flush(f)
    f.close()
2078

Fred Drake's avatar
Fred Drake committed
2079

2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093
class Error(Exception):
    """Raised for module-specific errors."""

class NoSuchMailboxError(Error):
    """The specified mailbox does not exist and won't be created."""

class NotEmptyError(Error):
    """The specified mailbox is not empty and deletion was requested."""

class ExternalClashError(Error):
    """Another process caused an action to fail."""

class FormatError(Error):
    """A file appears to have an invalid format."""