base64mime.py 3.47 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# Copyright (C) 2002-2007 Python Software Foundation
# Author: Ben Gertzfield
# Contact: email-sig@python.org

"""Base64 content transfer encoding per RFCs 2045-2047.

This module handles the content transfer encoding method defined in RFC 2045
to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
characters encoding known as Base64.

It is used in the MIME standards for email to attach images, audio, and text
using some 8-bit character sets to messages.

This module provides an interface to encode and decode both headers and bodies
with Base64 encoding.

RFC 2045 defines a method for including character set information in an
`encoded-word' in a header.  This method is commonly used for 8-bit real names
in To:, From:, Cc:, etc. fields, as well as Subject: lines.

This module does not do the line wrapping or end-of-line character conversion
necessary for proper internationalized headers; it only does dumb encoding and
23
decoding.  To deal with the various line wrapping issues, use the email.header
24 25 26 27 28 29 30 31 32
module.
"""

__all__ = [
    'body_decode',
    'body_encode',
    'decode',
    'decodestring',
    'header_encode',
33
    'header_length',
34 35 36
    ]


37
from base64 import b64encode
38 39 40 41 42 43 44 45 46 47 48 49
from binascii import b2a_base64, a2b_base64

CRLF = '\r\n'
NL = '\n'
EMPTYSTRING = ''

# See also Charset.py
MISC_LEN = 7



# Helpers
50
def header_length(bytearray):
51
    """Return the length of s when it is encoded with base64."""
52
    groups_of_3, leftover = divmod(len(bytearray), 3)
53 54 55 56 57 58 59 60
    # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
    n = groups_of_3 * 4
    if leftover:
        n += 4
    return n



61
def header_encode(header_bytes, charset='iso-8859-1'):
62 63 64
    """Encode a single header line with Base64 encoding in a given charset.

    charset names the character set to use to encode the header.  It defaults
65
    to iso-8859-1.  Base64 encoding is defined in RFC 2045.
66
    """
67
    if not header_bytes:
68 69 70
        return ""
    if isinstance(header_bytes, str):
        header_bytes = header_bytes.encode(charset)
71
    encoded = b64encode(header_bytes).decode("ascii")
72
    return '=?%s?b?%s?=' % (charset, encoded)
73 74 75



76
def body_encode(s, maxlinelen=76, eol=NL):
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
    """Encode a string with base64.

    Each line will be wrapped at, at most, maxlinelen characters (defaults to
    76 characters).

    Each line of encoded text will end with eol, which defaults to "\\n".  Set
    this to "\r\n" if you will be using the result of this function directly
    in an email.
    """
    if not s:
        return s

    encvec = []
    max_unencoded = maxlinelen * 3 // 4
    for i in range(0, len(s), max_unencoded):
        # BAW: should encode() inherit b2a_base64()'s dubious behavior in
        # adding a newline to the encoded string?
94
        enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
95 96 97 98 99 100 101
        if enc.endswith(NL) and eol != NL:
            enc = enc[:-1] + eol
        encvec.append(enc)
    return EMPTYSTRING.join(encvec)



102
def decode(string):
103 104
    """Decode a raw base64 string, returning a bytes object.

105 106
    This function does not parse a full MIME header value encoded with
    base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
107
    level email.header class for that functionality.
108
    """
109 110 111 112 113
    if not string:
        return bytes()
    elif isinstance(string, str):
        return a2b_base64(string.encode('raw-unicode-escape'))
    else:
114
        return a2b_base64(string)
115 116 117 118 119


# For convenience and backwards compatibility w/ standard base64 module
body_decode = decode
decodestring = decode