base64mime.py 5.66 KB
Newer Older
1
# Copyright (C) 2002-2006 Python Software Foundation
2 3
# Author: Ben Gertzfield
# Contact: email-sig@python.org
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22

"""Base64 content transfer encoding per RFCs 2045-2047.

This module handles the content transfer encoding method defined in RFC 2045
to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
characters encoding known as Base64.

It is used in the MIME standards for email to attach images, audio, and text
using some 8-bit character sets to messages.

This module provides an interface to encode and decode both headers and bodies
with Base64 encoding.

RFC 2045 defines a method for including character set information in an
`encoded-word' in a header.  This method is commonly used for 8-bit real names
in To:, From:, Cc:, etc. fields, as well as Subject: lines.

This module does not do the line wrapping or end-of-line character conversion
necessary for proper internationalized headers; it only does dumb encoding and
23
decoding.  To deal with the various line wrapping issues, use the email.header
24 25 26
module.
"""

27 28 29 30 31 32 33 34 35 36 37 38
__all__ = [
    'base64_len',
    'body_decode',
    'body_encode',
    'decode',
    'decodestring',
    'encode',
    'encodestring',
    'header_encode',
    ]


39
from binascii import b2a_base64, a2b_base64
40
from email.utils import fix_eols
41 42 43 44 45 46 47 48 49 50 51 52 53

CRLF = '\r\n'
NL = '\n'
EMPTYSTRING = ''

# See also Charset.py
MISC_LEN = 7



# Helpers
def base64_len(s):
    """Return the length of s when it is encoded with base64."""
Tim Peters's avatar
Tim Peters committed
54 55
    groups_of_3, leftover = divmod(len(s), 3)
    # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
56
    # Thanks, Tim!
Tim Peters's avatar
Tim Peters committed
57 58 59 60
    n = groups_of_3 * 4
    if leftover:
        n += 4
    return n
61 62 63



64 65
def header_encode(header, charset='iso-8859-1', keep_eols=False,
                  maxlinelen=76, eol=NL):
66
    """Encode a single header line with Base64 encoding in a given charset.
Tim Peters's avatar
Tim Peters committed
67

68 69 70 71 72 73 74 75 76
    Defined in RFC 2045, this Base64 encoding is identical to normal Base64
    encoding, except that each line must be intelligently wrapped (respecting
    the Base64 encoding), and subsequent lines must start with a space.

    charset names the character set to use to encode the header.  It defaults
    to iso-8859-1.

    End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
    to the canonical email line separator \\r\\n unless the keep_eols
77
    parameter is True (the default is False).
78 79 80 81 82 83 84 85 86

    Each line of the header will be terminated in the value of eol, which
    defaults to "\\n".  Set this to "\\r\\n" if you are using the result of
    this function directly in email.

    The resulting string will be in the form:

    "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
      =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
Tim Peters's avatar
Tim Peters committed
87

88 89 90 91 92 93 94 95 96
    with each line wrapped at, at most, maxlinelen characters (defaults to 76
    characters).
    """
    # Return empty headers unchanged
    if not header:
        return header

    if not keep_eols:
        header = fix_eols(header)
Tim Peters's avatar
Tim Peters committed
97

98 99 100 101
    # Base64 encode each line, in encoded chunks no greater than maxlinelen in
    # length, after the RFC chrome is added in.
    base64ed = []
    max_encoded = maxlinelen - len(charset) - MISC_LEN
102
    max_unencoded = max_encoded * 3 // 4
103 104 105 106 107 108 109 110

    for i in range(0, len(header), max_unencoded):
        base64ed.append(b2a_base64(header[i:i+max_unencoded]))

    # Now add the RFC chrome to each encoded chunk
    lines = []
    for line in base64ed:
        # Ignore the last character of each line if it is a newline
111
        if line.endswith(NL):
112 113 114 115 116 117 118 119 120 121
            line = line[:-1]
        # Add the chrome
        lines.append('=?%s?b?%s?=' % (charset, line))
    # Glue the lines together and return it.  BAW: should we be able to
    # specify the leading whitespace in the joiner?
    joiner = eol + ' '
    return joiner.join(lines)



122
def encode(s, binary=True, maxlinelen=76, eol=NL):
123 124 125 126 127
    """Encode a string with base64.

    Each line will be wrapped at, at most, maxlinelen characters (defaults to
    76 characters).

128
    If binary is False, end-of-line characters will be converted to the
129 130 131 132 133 134 135 136 137
    canonical email end-of-line sequence \\r\\n.  Otherwise they will be left
    verbatim (this is the default).

    Each line of encoded text will end with eol, which defaults to "\\n".  Set
    this to "\r\n" if you will be using the result of this function directly
    in an email.
    """
    if not s:
        return s
Tim Peters's avatar
Tim Peters committed
138

139 140
    if not binary:
        s = fix_eols(s)
Tim Peters's avatar
Tim Peters committed
141

142
    encvec = []
143
    max_unencoded = maxlinelen * 3 // 4
144 145 146 147
    for i in range(0, len(s), max_unencoded):
        # BAW: should encode() inherit b2a_base64()'s dubious behavior in
        # adding a newline to the encoded string?
        enc = b2a_base64(s[i:i + max_unencoded])
148
        if enc.endswith(NL) and eol != NL:
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
            enc = enc[:-1] + eol
        encvec.append(enc)
    return EMPTYSTRING.join(encvec)


# For convenience and backwards compatibility w/ standard base64 module
body_encode = encode
encodestring = encode



def decode(s, convert_eols=None):
    """Decode a raw base64 string.

    If convert_eols is set to a string value, all canonical email linefeeds,
    e.g. "\\r\\n", in the decoded text will be converted to the value of
    convert_eols.  os.linesep is a good choice for convert_eols if you are
    decoding a text attachment.

    This function does not parse a full MIME header value encoded with
    base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
170
    level email.header class for that functionality.
171 172 173
    """
    if not s:
        return s
Tim Peters's avatar
Tim Peters committed
174

175 176 177 178 179 180 181 182 183
    dec = a2b_base64(s)
    if convert_eols:
        return dec.replace(CRLF, convert_eols)
    return dec


# For convenience and backwards compatibility w/ standard base64 module
body_decode = decode
decodestring = decode