Oops. I copied a slightly older version of the email package from the sandbox.

This should restore the email package in the py3k branch to exactly what's in the sandbox. This wipes out 1-2 fixes made post-copy, which I'll re-apply shortly.

Oops. I copied a slightly older version of the email package from the sandbox.
This should restore the email package in the py3k branch to exactly what's in the sandbox. This wipes out 1-2 fixes made post-copy, which I'll re-apply shortly.
9604e666 · Guido van Rossum · 2c440a10 · 9604e666 · 9604e666 · 9604e666
Kaydet (Commit) 9604e666 authored Agu 30, 2007 tarafından Guido van Rossum
8 changed files
--- a/Lib/email/base64mime.py
+++ b/Lib/email/base64mime.py
@@ -25,7 +25,6 @@ module.
 """
 __all__ = [
-    'base64_len',
    'body_decode',
    'body_encode',
    'decode',
@@ -33,12 +32,13 @@ __all__ = [
    'encode',
    'encodestring',
    'header_encode',
+    'header_length',
    ]
 import re
+from base64 import b64encode
 from binascii import b2a_base64, a2b_base64
-from email.utils import fix_eols
 CRLF = '\r\n'
 NL = '\n'
@@ -50,11 +50,10 @@ MISC_LEN = 7
 # Helpers
-def base64_len(s):
+def header_length(bytearray):
    """Return the length of s when it is encoded with base64."""
-    groups_of_3, leftover = divmod(len(s), 3)
+    groups_of_3, leftover = divmod(len(bytearray), 3)
    # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
-    # Thanks, Tim!
    n = groups_of_3 * 4
    if leftover:
        n += 4
@@ -62,74 +61,26 @@ def base64_len(s):
-def header_encode(header, charset='iso-8859-1', keep_eols=False,
+def header_encode(header_bytes, charset='iso-8859-1'):
-                  maxlinelen=76, eol=NL):
    """Encode a single header line with Base64 encoding in a given charset.
-    Defined in RFC 2045, this Base64 encoding is identical to normal Base64
-    encoding, except that each line must be intelligently wrapped (respecting
-    the Base64 encoding), and subsequent lines must start with a space.
    charset names the character set to use to encode the header.  It defaults
-    to iso-8859-1.
+    to iso-8859-1.  Base64 encoding is defined in RFC 2045.
-    End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
-    to the canonical email line separator \\r\\n unless the keep_eols
-    parameter is True (the default is False).
-    Each line of the header will be terminated in the value of eol, which
-    defaults to "\\n".  Set this to "\\r\\n" if you are using the result of
-    this function directly in email.
-    The resulting string will be in the form:
-    "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
-      =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
-    with each line wrapped at, at most, maxlinelen characters (defaults to 76
-    characters).
    """
    # Return empty headers unchanged
-    if not header:
+    if not header_bytes:
-        return header
+        return str(header_bytes)
+    encoded = b64encode(header_bytes)
-    if not keep_eols:
+    return '=?%s?b?%s?=' % (charset, encoded)
-        header = fix_eols(header)
-    # Base64 encode each line, in encoded chunks no greater than maxlinelen in
-    # length, after the RFC chrome is added in.
-    base64ed = []
-    max_encoded = maxlinelen - len(charset) - MISC_LEN
-    max_unencoded = max_encoded * 3 // 4
-    for i in range(0, len(header), max_unencoded):
-        base64ed.append(b2a_base64(header[i:i+max_unencoded]))
-    # Now add the RFC chrome to each encoded chunk
-    lines = []
-    for line in base64ed:
-        # Ignore the last character of each line if it is a newline
-        if line[-1] == ord(NL):
-            line = line[:-1]
-        # Add the chrome
-        lines.append('=?%s?b?%s?=' % (charset, line))
-    # Glue the lines together and return it.  BAW: should we be able to
-    # specify the leading whitespace in the joiner?
-    joiner = eol + ' '
-    return joiner.join(lines)
-def encode(s, binary=True, maxlinelen=76, eol=NL):
+def body_encode(s, maxlinelen=76, eol=NL):
    """Encode a string with base64.
    Each line will be wrapped at, at most, maxlinelen characters (defaults to
    76 characters).
-    If binary is False, end-of-line characters will be converted to the
-    canonical email end-of-line sequence \\r\\n.  Otherwise they will be left
-    verbatim (this is the default).
    Each line of encoded text will end with eol, which defaults to "\\n".  Set
    this to "\r\n" if you will be using the result of this function directly
    in an email.
@@ -137,9 +88,6 @@ def encode(s, binary=True, maxlinelen=76, eol=NL):
    if not s:
        return s
-    if not binary:
-        s = fix_eols(s)
    encvec = []
    max_unencoded = maxlinelen * 3 // 4
    for i in range(0, len(s), max_unencoded):
@@ -152,25 +100,26 @@ def encode(s, binary=True, maxlinelen=76, eol=NL):
    return EMPTYSTRING.join(encvec)
-# For convenience and backwards compatibility w/ standard base64 module
-body_encode = encode
-encodestring = encode
-def decode(string):
+def decode(s, convert_eols=False):
    """Decode a raw base64 string, returning a bytes object.
-    This function does not parse a full MIME header value encoded with base64
+    If convert_eols is set to a string value, all canonical email linefeeds,
-    (like =?iso-8895-1?b?bmloISBuaWgh?=) -- use the high level
+    e.g. "\\r\\n", in the decoded text will be converted to the value of
-    email.Header class for that functionality.
+    convert_eols.  os.linesep is a good choice for convert_eols if you are
+    decoding a text attachment.
+    This function does not parse a full MIME header value encoded with
+    base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
+    level email.Header class for that functionality.
    """
-    if not string:
+    if not s:
-        return bytes()
+        return s
-    elif isinstance(string, str):
-        return a2b_base64(string.encode('raw-unicode-escape'))
+    dec = a2b_base64(s)
-    else:
+    if convert_eols:
-        return a2b_base64(string)
+        return dec.replace(CRLF, convert_eols)
+    return dec
 # For convenience and backwards compatibility w/ standard base64 module

--- a/Lib/email/charset.py
+++ b/Lib/email/charset.py
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -133,12 +133,8 @@ class Generator:
    def _write_headers(self, msg):
        for h, v in msg.items():
            print('%s:' % h, end=' ', file=self._fp)
-            if self._maxheaderlen == 0:
+            if isinstance(v, Header):
-                # Explicit no-wrapping
+                print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp)
-                print(v, file=self._fp)
-            elif isinstance(v, Header):
-                # Header instances know what to do
-                print(v.encode(), file=self._fp)
            else:
                # Header's got lots of smarts, so use it.
                header = Header(v, maxlinelen=self._maxheaderlen,

--- a/Lib/email/header.py
+++ b/Lib/email/header.py
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -13,9 +13,9 @@ import warnings
 from io import BytesIO, StringIO
 # Intrapackage imports
-import email.charset
 from email import utils
 from email import errors
+from email.charset import Charset
 SEMISPACE = '; '
@@ -201,7 +201,7 @@ class Message:
                # Incorrect padding
                pass
        elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
-            in_file = BytesIO((payload + '\n').encode('raw-unicode-escape'))
+            in_file = BytesIO(bytes(payload + '\n'))
            out_file = BytesIO()
            try:
                uu.decode(in_file, out_file, quiet=True)
@@ -211,7 +211,7 @@ class Message:
                pass
        # Is there a better way to do this?  We can't use the bytes
        # constructor.
-        return bytes(ord(c) for c in payload)
+        return bytes(payload, 'raw-unicode-escape')
    def set_payload(self, payload, charset=None):
        """Set the payload to the given value.
@@ -236,18 +236,13 @@ class Message:
        and encoded properly, if needed, when generating the plain text
        representation of the message.  MIME headers (MIME-Version,
        Content-Type, Content-Transfer-Encoding) will be added as needed.
        """
        if charset is None:
            self.del_param('charset')
            self._charset = None
            return
-        if isinstance(charset, basestring):
+        if not isinstance(charset, Charset):
-            charset = email.charset.Charset(charset)
+            charset = Charset(charset)
-        if not isinstance(charset, email.charset.Charset):
-            raise TypeError(charset)
-        # BAW: should we accept strings that can serve as arguments to the
-        # Charset constructor?
        self._charset = charset
        if 'MIME-Version' not in self:
            self.add_header('MIME-Version', '1.0')
@@ -256,7 +251,7 @@ class Message:
                            charset=charset.get_output_charset())
        else:
            self.set_param('charset', charset.get_output_charset())
-        if str(charset) != charset.get_output_charset():
+        if charset != charset.get_output_charset():
            self._payload = charset.body_encode(self._payload)
        if 'Content-Transfer-Encoding' not in self:
            cte = charset.get_body_encoding()
@@ -757,8 +752,7 @@ class Message:
                # LookupError will be raised if the charset isn't known to
                # Python.  UnicodeError will be raised if the encoded text
                # contains a character not in the charset.
-                as_bytes = charset[2].encode('raw-unicode-escape')
+                charset = str(bytes(charset[2]), pcharset)
-                charset = str(as_bytes, pcharset)
            except (LookupError, UnicodeError):
                charset = charset[2]
        # charset characters must be in us-ascii range

--- a/Lib/email/quoprimime.py
+++ b/Lib/email/quoprimime.py
@@ -29,16 +29,14 @@ wrapping issues, use the email.Header module.
 __all__ = [
    'body_decode',
    'body_encode',
-    'body_quopri_check',
+    'body_length',
-    'body_quopri_len',
    'decode',
    'decodestring',
    'encode',
    'encodestring',
    'header_decode',
    'header_encode',
-    'header_quopri_check',
+    'header_length',
-    'header_quopri_len',
    'quote',
    'unquote',
    ]
@@ -46,54 +44,65 @@ __all__ = [
 import re
 from string import ascii_letters, digits, hexdigits
-from email.utils import fix_eols
 CRLF = '\r\n'
 NL = '\n'
 EMPTYSTRING = ''
-# See also Charset.py
+# Build a mapping of octets to the expansion of that octet.  Since we're only
-MISC_LEN = 7
+# going to have 256 of these things, this isn't terribly inefficient
+# space-wise.  Remember that headers and bodies have different sets of safe
+# characters.  Initialize both maps with the full expansion, and then override
+# the safe bytes with the more compact form.
+_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))
+_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()
-HEADER_SAFE_BYTES = (b'-!*+/ ' +
+# Safe header bytes which need no encoding.
-                     ascii_letters.encode('raw-unicode-escape') +
+for c in b'-!*+/' + bytes(ascii_letters) + bytes(digits):
-                     digits.encode('raw-unicode-escape'))
+    _QUOPRI_HEADER_MAP[c] = chr(c)
+# Headers have one other special encoding; spaces become underscores.
+_QUOPRI_HEADER_MAP[ord(' ')] = '_'
-BODY_SAFE_BYTES   = (b' !"#$%&\'()*+,-./0123456789:;<>'
+# Safe body bytes which need no encoding.
-                     b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'
+for c in (b' !"#$%&\'()*+,-./0123456789:;<>'
-                     b'abcdefghijklmnopqrstuvwxyz{|}~\t')
+          b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'
+          b'abcdefghijklmnopqrstuvwxyz{|}~\t'):
+    _QUOPRI_BODY_MAP[c] = chr(c)
 # Helpers
-def header_quopri_check(c):
+def header_check(octet):
-    """Return True if the character should be escaped with header quopri."""
+    """Return True if the octet should be escaped with header quopri."""
-    return c not in HEADER_SAFE_BYTES
+    return chr(octet) != _QUOPRI_HEADER_MAP[octet]
-def body_quopri_check(c):
+def body_check(octet):
-    """Return True if the character should be escaped with body quopri."""
+    """Return True if the octet should be escaped with body quopri."""
-    return c not in BODY_SAFE_BYTES
+    return chr(octet) != _QUOPRI_BODY_MAP[octet]
-def header_quopri_len(bytearray):
+def header_length(bytearray):
-    """Return the length of bytearray when it is encoded with header quopri.
+    """Return a header quoted-printable encoding length.
    Note that this does not include any RFC 2047 chrome added by
    `header_encode()`.
+    :param bytearray: An array of bytes (a.k.a. octets).
+    :return: The length in bytes of the byte array when it is encoded with
+        quoted-printable for headers.
    """
-    count = 0
+    return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)
-    for c in bytearray:
-        count += (3 if header_quopri_check(c) else 1)
-    return count
+def body_length(bytearray):
+    """Return a body quoted-printable encoding length.
-def body_quopri_len(bytearray):
+    :param bytearray: An array of bytes (a.k.a. octets).
-    """Return the length of bytearray when it is encoded with body quopri."""
+    :return: The length in bytes of the byte array when it is encoded with
-    count = 0
+        quoted-printable for bodies.
-    for c in bytearray:
+    """
-        count += (3 if body_quopri_check(c) else 1)
+    return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)
-    return count
 def _max_append(L, s, maxlen, extra=''):
@@ -133,29 +142,17 @@ def header_encode(header_bytes, charset='iso-8859-1'):
        return str(header_bytes)
    # Iterate over every byte, encoding if necessary.
    encoded = []
-    for character in header_bytes:
+    for octet in header_bytes:
-        # Space may be represented as _ instead of =20 for readability
+        encoded.append(_QUOPRI_HEADER_MAP[octet])
-        if character == ord(' '):
-            encoded.append('_')
-        # These characters can be included verbatim.
-        elif not header_quopri_check(character):
-            encoded.append(chr(character))
-        # Otherwise, replace with hex value like =E2
-        else:
-            encoded.append('=%02X' % character)
    # Now add the RFC chrome to each encoded chunk and glue the chunks
    # together.
    return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
-def encode(body, binary=False, maxlinelen=76, eol=NL):
+def body_encode(body, maxlinelen=76, eol=NL):
    """Encode with quoted-printable, wrapping at maxlinelen characters.
-    If binary is False (the default), end-of-line characters will be converted
-    to the canonical email end-of-line sequence \\r\\n.  Otherwise they will
-    be left verbatim.
    Each line of encoded text will end with eol, which defaults to "\\n".  Set
    this to "\\r\\n" if you will be using the result of this function directly
    in an email.
@@ -168,9 +165,6 @@ def encode(body, binary=False, maxlinelen=76, eol=NL):
    if not body:
        return body
-    if not binary:
-        body = fix_eols(body)
    # BAW: We're accumulating the body text by string concatenation.  That
    # can't be very efficient, but I don't have time now to rewrite it.  It
    # just feels like this algorithm could be more efficient.
@@ -195,7 +189,7 @@ def encode(body, binary=False, maxlinelen=76, eol=NL):
        for j in range(linelen):
            c = line[j]
            prev = c
-            if body_quopri_check(c):
+            if body_check(c):
                c = quote(c)
            elif j+1 == linelen:
                # Check for whitespace at end of line; special case
@@ -231,11 +225,6 @@ def encode(body, binary=False, maxlinelen=76, eol=NL):
    return encoded_body
-# For convenience and backwards compatibility w/ standard base64 module
-body_encode = encode
-encodestring = encode
 # BAW: I'm not sure if the intent was for the signature of this function to be
 # the same as base64MIME.decode() or not...

--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -71,16 +71,6 @@ def _bdecode(s):
-def fix_eols(s):
-    """Replace all line-ending characters with \r\n."""
-    # Fix newlines with no preceding carriage return
-    s = re.sub(r'(?<!\r)\n', CRLF, s)
-    # Fix carriage returns with no following newline
-    s = re.sub(r'\r(?!\n)', CRLF, s)
-    return s
 def formataddr(pair):
    """The inverse of parseaddr(), this takes a 2-tuple of the form
    (realname, email_address) and returns the string value suitable
@@ -317,7 +307,7 @@ def collapse_rfc2231_value(value, errors='replace',
    # object.  We do not want bytes() normal utf-8 decoder, we want a straight
    # interpretation of the string as character bytes.
    charset, language, text = value
-    rawbytes = bytes(ord(c) for c in text)
+    rawbytes = bytes(text, 'raw-unicode-escape')
    try:
        return str(rawbytes, charset, errors)
    except LookupError: