Kaydet (Commit) 9604e666 authored tarafından Guido van Rossum's avatar Guido van Rossum

Oops. I copied a slightly older version of the email package from the sandbox.

This should restore the email package in the py3k branch to exactly what's in
the sandbox.

This wipes out 1-2 fixes made post-copy, which I'll re-apply shortly.
üst 2c440a10
...@@ -25,7 +25,6 @@ module. ...@@ -25,7 +25,6 @@ module.
""" """
__all__ = [ __all__ = [
'base64_len',
'body_decode', 'body_decode',
'body_encode', 'body_encode',
'decode', 'decode',
...@@ -33,12 +32,13 @@ __all__ = [ ...@@ -33,12 +32,13 @@ __all__ = [
'encode', 'encode',
'encodestring', 'encodestring',
'header_encode', 'header_encode',
'header_length',
] ]
import re import re
from base64 import b64encode
from binascii import b2a_base64, a2b_base64 from binascii import b2a_base64, a2b_base64
from email.utils import fix_eols
CRLF = '\r\n' CRLF = '\r\n'
NL = '\n' NL = '\n'
...@@ -50,11 +50,10 @@ MISC_LEN = 7 ...@@ -50,11 +50,10 @@ MISC_LEN = 7
# Helpers # Helpers
def base64_len(s): def header_length(bytearray):
"""Return the length of s when it is encoded with base64.""" """Return the length of s when it is encoded with base64."""
groups_of_3, leftover = divmod(len(s), 3) groups_of_3, leftover = divmod(len(bytearray), 3)
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
# Thanks, Tim!
n = groups_of_3 * 4 n = groups_of_3 * 4
if leftover: if leftover:
n += 4 n += 4
...@@ -62,74 +61,26 @@ def base64_len(s): ...@@ -62,74 +61,26 @@ def base64_len(s):
def header_encode(header, charset='iso-8859-1', keep_eols=False, def header_encode(header_bytes, charset='iso-8859-1'):
maxlinelen=76, eol=NL):
"""Encode a single header line with Base64 encoding in a given charset. """Encode a single header line with Base64 encoding in a given charset.
Defined in RFC 2045, this Base64 encoding is identical to normal Base64
encoding, except that each line must be intelligently wrapped (respecting
the Base64 encoding), and subsequent lines must start with a space.
charset names the character set to use to encode the header. It defaults charset names the character set to use to encode the header. It defaults
to iso-8859-1. to iso-8859-1. Base64 encoding is defined in RFC 2045.
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
to the canonical email line separator \\r\\n unless the keep_eols
parameter is True (the default is False).
Each line of the header will be terminated in the value of eol, which
defaults to "\\n". Set this to "\\r\\n" if you are using the result of
this function directly in email.
The resulting string will be in the form:
"=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
=?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
with each line wrapped at, at most, maxlinelen characters (defaults to 76
characters).
""" """
# Return empty headers unchanged # Return empty headers unchanged
if not header: if not header_bytes:
return header return str(header_bytes)
encoded = b64encode(header_bytes)
if not keep_eols: return '=?%s?b?%s?=' % (charset, encoded)
header = fix_eols(header)
# Base64 encode each line, in encoded chunks no greater than maxlinelen in
# length, after the RFC chrome is added in.
base64ed = []
max_encoded = maxlinelen - len(charset) - MISC_LEN
max_unencoded = max_encoded * 3 // 4
for i in range(0, len(header), max_unencoded):
base64ed.append(b2a_base64(header[i:i+max_unencoded]))
# Now add the RFC chrome to each encoded chunk
lines = []
for line in base64ed:
# Ignore the last character of each line if it is a newline
if line[-1] == ord(NL):
line = line[:-1]
# Add the chrome
lines.append('=?%s?b?%s?=' % (charset, line))
# Glue the lines together and return it. BAW: should we be able to
# specify the leading whitespace in the joiner?
joiner = eol + ' '
return joiner.join(lines)
def encode(s, binary=True, maxlinelen=76, eol=NL): def body_encode(s, maxlinelen=76, eol=NL):
"""Encode a string with base64. """Encode a string with base64.
Each line will be wrapped at, at most, maxlinelen characters (defaults to Each line will be wrapped at, at most, maxlinelen characters (defaults to
76 characters). 76 characters).
If binary is False, end-of-line characters will be converted to the
canonical email end-of-line sequence \\r\\n. Otherwise they will be left
verbatim (this is the default).
Each line of encoded text will end with eol, which defaults to "\\n". Set Each line of encoded text will end with eol, which defaults to "\\n". Set
this to "\r\n" if you will be using the result of this function directly this to "\r\n" if you will be using the result of this function directly
in an email. in an email.
...@@ -137,9 +88,6 @@ def encode(s, binary=True, maxlinelen=76, eol=NL): ...@@ -137,9 +88,6 @@ def encode(s, binary=True, maxlinelen=76, eol=NL):
if not s: if not s:
return s return s
if not binary:
s = fix_eols(s)
encvec = [] encvec = []
max_unencoded = maxlinelen * 3 // 4 max_unencoded = maxlinelen * 3 // 4
for i in range(0, len(s), max_unencoded): for i in range(0, len(s), max_unencoded):
...@@ -152,25 +100,26 @@ def encode(s, binary=True, maxlinelen=76, eol=NL): ...@@ -152,25 +100,26 @@ def encode(s, binary=True, maxlinelen=76, eol=NL):
return EMPTYSTRING.join(encvec) return EMPTYSTRING.join(encvec)
# For convenience and backwards compatibility w/ standard base64 module
body_encode = encode
encodestring = encode
def decode(string): def decode(s, convert_eols=False):
"""Decode a raw base64 string, returning a bytes object. """Decode a raw base64 string, returning a bytes object.
This function does not parse a full MIME header value encoded with base64 If convert_eols is set to a string value, all canonical email linefeeds,
(like =?iso-8895-1?b?bmloISBuaWgh?=) -- use the high level e.g. "\\r\\n", in the decoded text will be converted to the value of
email.Header class for that functionality. convert_eols. os.linesep is a good choice for convert_eols if you are
decoding a text attachment.
This function does not parse a full MIME header value encoded with
base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
level email.Header class for that functionality.
""" """
if not string: if not s:
return bytes() return s
elif isinstance(string, str):
return a2b_base64(string.encode('raw-unicode-escape')) dec = a2b_base64(s)
else: if convert_eols:
return a2b_base64(string) return dec.replace(CRLF, convert_eols)
return dec
# For convenience and backwards compatibility w/ standard base64 module # For convenience and backwards compatibility w/ standard base64 module
......
This diff is collapsed.
...@@ -133,12 +133,8 @@ class Generator: ...@@ -133,12 +133,8 @@ class Generator:
def _write_headers(self, msg): def _write_headers(self, msg):
for h, v in msg.items(): for h, v in msg.items():
print('%s:' % h, end=' ', file=self._fp) print('%s:' % h, end=' ', file=self._fp)
if self._maxheaderlen == 0: if isinstance(v, Header):
# Explicit no-wrapping print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp)
print(v, file=self._fp)
elif isinstance(v, Header):
# Header instances know what to do
print(v.encode(), file=self._fp)
else: else:
# Header's got lots of smarts, so use it. # Header's got lots of smarts, so use it.
header = Header(v, maxlinelen=self._maxheaderlen, header = Header(v, maxlinelen=self._maxheaderlen,
......
This diff is collapsed.
...@@ -13,9 +13,9 @@ import warnings ...@@ -13,9 +13,9 @@ import warnings
from io import BytesIO, StringIO from io import BytesIO, StringIO
# Intrapackage imports # Intrapackage imports
import email.charset
from email import utils from email import utils
from email import errors from email import errors
from email.charset import Charset
SEMISPACE = '; ' SEMISPACE = '; '
...@@ -201,7 +201,7 @@ class Message: ...@@ -201,7 +201,7 @@ class Message:
# Incorrect padding # Incorrect padding
pass pass
elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
in_file = BytesIO((payload + '\n').encode('raw-unicode-escape')) in_file = BytesIO(bytes(payload + '\n'))
out_file = BytesIO() out_file = BytesIO()
try: try:
uu.decode(in_file, out_file, quiet=True) uu.decode(in_file, out_file, quiet=True)
...@@ -211,7 +211,7 @@ class Message: ...@@ -211,7 +211,7 @@ class Message:
pass pass
# Is there a better way to do this? We can't use the bytes # Is there a better way to do this? We can't use the bytes
# constructor. # constructor.
return bytes(ord(c) for c in payload) return bytes(payload, 'raw-unicode-escape')
def set_payload(self, payload, charset=None): def set_payload(self, payload, charset=None):
"""Set the payload to the given value. """Set the payload to the given value.
...@@ -236,18 +236,13 @@ class Message: ...@@ -236,18 +236,13 @@ class Message:
and encoded properly, if needed, when generating the plain text and encoded properly, if needed, when generating the plain text
representation of the message. MIME headers (MIME-Version, representation of the message. MIME headers (MIME-Version,
Content-Type, Content-Transfer-Encoding) will be added as needed. Content-Type, Content-Transfer-Encoding) will be added as needed.
""" """
if charset is None: if charset is None:
self.del_param('charset') self.del_param('charset')
self._charset = None self._charset = None
return return
if isinstance(charset, basestring): if not isinstance(charset, Charset):
charset = email.charset.Charset(charset) charset = Charset(charset)
if not isinstance(charset, email.charset.Charset):
raise TypeError(charset)
# BAW: should we accept strings that can serve as arguments to the
# Charset constructor?
self._charset = charset self._charset = charset
if 'MIME-Version' not in self: if 'MIME-Version' not in self:
self.add_header('MIME-Version', '1.0') self.add_header('MIME-Version', '1.0')
...@@ -256,7 +251,7 @@ class Message: ...@@ -256,7 +251,7 @@ class Message:
charset=charset.get_output_charset()) charset=charset.get_output_charset())
else: else:
self.set_param('charset', charset.get_output_charset()) self.set_param('charset', charset.get_output_charset())
if str(charset) != charset.get_output_charset(): if charset != charset.get_output_charset():
self._payload = charset.body_encode(self._payload) self._payload = charset.body_encode(self._payload)
if 'Content-Transfer-Encoding' not in self: if 'Content-Transfer-Encoding' not in self:
cte = charset.get_body_encoding() cte = charset.get_body_encoding()
...@@ -757,8 +752,7 @@ class Message: ...@@ -757,8 +752,7 @@ class Message:
# LookupError will be raised if the charset isn't known to # LookupError will be raised if the charset isn't known to
# Python. UnicodeError will be raised if the encoded text # Python. UnicodeError will be raised if the encoded text
# contains a character not in the charset. # contains a character not in the charset.
as_bytes = charset[2].encode('raw-unicode-escape') charset = str(bytes(charset[2]), pcharset)
charset = str(as_bytes, pcharset)
except (LookupError, UnicodeError): except (LookupError, UnicodeError):
charset = charset[2] charset = charset[2]
# charset characters must be in us-ascii range # charset characters must be in us-ascii range
......
...@@ -29,16 +29,14 @@ wrapping issues, use the email.Header module. ...@@ -29,16 +29,14 @@ wrapping issues, use the email.Header module.
__all__ = [ __all__ = [
'body_decode', 'body_decode',
'body_encode', 'body_encode',
'body_quopri_check', 'body_length',
'body_quopri_len',
'decode', 'decode',
'decodestring', 'decodestring',
'encode', 'encode',
'encodestring', 'encodestring',
'header_decode', 'header_decode',
'header_encode', 'header_encode',
'header_quopri_check', 'header_length',
'header_quopri_len',
'quote', 'quote',
'unquote', 'unquote',
] ]
...@@ -46,54 +44,65 @@ __all__ = [ ...@@ -46,54 +44,65 @@ __all__ = [
import re import re
from string import ascii_letters, digits, hexdigits from string import ascii_letters, digits, hexdigits
from email.utils import fix_eols
CRLF = '\r\n' CRLF = '\r\n'
NL = '\n' NL = '\n'
EMPTYSTRING = '' EMPTYSTRING = ''
# See also Charset.py # Build a mapping of octets to the expansion of that octet. Since we're only
MISC_LEN = 7 # going to have 256 of these things, this isn't terribly inefficient
# space-wise. Remember that headers and bodies have different sets of safe
# characters. Initialize both maps with the full expansion, and then override
# the safe bytes with the more compact form.
_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))
_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()
HEADER_SAFE_BYTES = (b'-!*+/ ' + # Safe header bytes which need no encoding.
ascii_letters.encode('raw-unicode-escape') + for c in b'-!*+/' + bytes(ascii_letters) + bytes(digits):
digits.encode('raw-unicode-escape')) _QUOPRI_HEADER_MAP[c] = chr(c)
# Headers have one other special encoding; spaces become underscores.
_QUOPRI_HEADER_MAP[ord(' ')] = '_'
BODY_SAFE_BYTES = (b' !"#$%&\'()*+,-./0123456789:;<>' # Safe body bytes which need no encoding.
b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`' for c in (b' !"#$%&\'()*+,-./0123456789:;<>'
b'abcdefghijklmnopqrstuvwxyz{|}~\t') b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'
b'abcdefghijklmnopqrstuvwxyz{|}~\t'):
_QUOPRI_BODY_MAP[c] = chr(c)
# Helpers # Helpers
def header_quopri_check(c): def header_check(octet):
"""Return True if the character should be escaped with header quopri.""" """Return True if the octet should be escaped with header quopri."""
return c not in HEADER_SAFE_BYTES return chr(octet) != _QUOPRI_HEADER_MAP[octet]
def body_quopri_check(c): def body_check(octet):
"""Return True if the character should be escaped with body quopri.""" """Return True if the octet should be escaped with body quopri."""
return c not in BODY_SAFE_BYTES return chr(octet) != _QUOPRI_BODY_MAP[octet]
def header_quopri_len(bytearray): def header_length(bytearray):
"""Return the length of bytearray when it is encoded with header quopri. """Return a header quoted-printable encoding length.
Note that this does not include any RFC 2047 chrome added by Note that this does not include any RFC 2047 chrome added by
`header_encode()`. `header_encode()`.
:param bytearray: An array of bytes (a.k.a. octets).
:return: The length in bytes of the byte array when it is encoded with
quoted-printable for headers.
""" """
count = 0 return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)
for c in bytearray:
count += (3 if header_quopri_check(c) else 1)
return count
def body_length(bytearray):
"""Return a body quoted-printable encoding length.
def body_quopri_len(bytearray): :param bytearray: An array of bytes (a.k.a. octets).
"""Return the length of bytearray when it is encoded with body quopri.""" :return: The length in bytes of the byte array when it is encoded with
count = 0 quoted-printable for bodies.
for c in bytearray: """
count += (3 if body_quopri_check(c) else 1) return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)
return count
def _max_append(L, s, maxlen, extra=''): def _max_append(L, s, maxlen, extra=''):
...@@ -133,29 +142,17 @@ def header_encode(header_bytes, charset='iso-8859-1'): ...@@ -133,29 +142,17 @@ def header_encode(header_bytes, charset='iso-8859-1'):
return str(header_bytes) return str(header_bytes)
# Iterate over every byte, encoding if necessary. # Iterate over every byte, encoding if necessary.
encoded = [] encoded = []
for character in header_bytes: for octet in header_bytes:
# Space may be represented as _ instead of =20 for readability encoded.append(_QUOPRI_HEADER_MAP[octet])
if character == ord(' '):
encoded.append('_')
# These characters can be included verbatim.
elif not header_quopri_check(character):
encoded.append(chr(character))
# Otherwise, replace with hex value like =E2
else:
encoded.append('=%02X' % character)
# Now add the RFC chrome to each encoded chunk and glue the chunks # Now add the RFC chrome to each encoded chunk and glue the chunks
# together. # together.
return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded)) return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
def encode(body, binary=False, maxlinelen=76, eol=NL): def body_encode(body, maxlinelen=76, eol=NL):
"""Encode with quoted-printable, wrapping at maxlinelen characters. """Encode with quoted-printable, wrapping at maxlinelen characters.
If binary is False (the default), end-of-line characters will be converted
to the canonical email end-of-line sequence \\r\\n. Otherwise they will
be left verbatim.
Each line of encoded text will end with eol, which defaults to "\\n". Set Each line of encoded text will end with eol, which defaults to "\\n". Set
this to "\\r\\n" if you will be using the result of this function directly this to "\\r\\n" if you will be using the result of this function directly
in an email. in an email.
...@@ -168,9 +165,6 @@ def encode(body, binary=False, maxlinelen=76, eol=NL): ...@@ -168,9 +165,6 @@ def encode(body, binary=False, maxlinelen=76, eol=NL):
if not body: if not body:
return body return body
if not binary:
body = fix_eols(body)
# BAW: We're accumulating the body text by string concatenation. That # BAW: We're accumulating the body text by string concatenation. That
# can't be very efficient, but I don't have time now to rewrite it. It # can't be very efficient, but I don't have time now to rewrite it. It
# just feels like this algorithm could be more efficient. # just feels like this algorithm could be more efficient.
...@@ -195,7 +189,7 @@ def encode(body, binary=False, maxlinelen=76, eol=NL): ...@@ -195,7 +189,7 @@ def encode(body, binary=False, maxlinelen=76, eol=NL):
for j in range(linelen): for j in range(linelen):
c = line[j] c = line[j]
prev = c prev = c
if body_quopri_check(c): if body_check(c):
c = quote(c) c = quote(c)
elif j+1 == linelen: elif j+1 == linelen:
# Check for whitespace at end of line; special case # Check for whitespace at end of line; special case
...@@ -231,11 +225,6 @@ def encode(body, binary=False, maxlinelen=76, eol=NL): ...@@ -231,11 +225,6 @@ def encode(body, binary=False, maxlinelen=76, eol=NL):
return encoded_body return encoded_body
# For convenience and backwards compatibility w/ standard base64 module
body_encode = encode
encodestring = encode
# BAW: I'm not sure if the intent was for the signature of this function to be # BAW: I'm not sure if the intent was for the signature of this function to be
# the same as base64MIME.decode() or not... # the same as base64MIME.decode() or not...
......
This diff is collapsed.
...@@ -71,16 +71,6 @@ def _bdecode(s): ...@@ -71,16 +71,6 @@ def _bdecode(s):
def fix_eols(s):
"""Replace all line-ending characters with \r\n."""
# Fix newlines with no preceding carriage return
s = re.sub(r'(?<!\r)\n', CRLF, s)
# Fix carriage returns with no following newline
s = re.sub(r'\r(?!\n)', CRLF, s)
return s
def formataddr(pair): def formataddr(pair):
"""The inverse of parseaddr(), this takes a 2-tuple of the form """The inverse of parseaddr(), this takes a 2-tuple of the form
(realname, email_address) and returns the string value suitable (realname, email_address) and returns the string value suitable
...@@ -317,7 +307,7 @@ def collapse_rfc2231_value(value, errors='replace', ...@@ -317,7 +307,7 @@ def collapse_rfc2231_value(value, errors='replace',
# object. We do not want bytes() normal utf-8 decoder, we want a straight # object. We do not want bytes() normal utf-8 decoder, we want a straight
# interpretation of the string as character bytes. # interpretation of the string as character bytes.
charset, language, text = value charset, language, text = value
rawbytes = bytes(ord(c) for c in text) rawbytes = bytes(text, 'raw-unicode-escape')
try: try:
return str(rawbytes, charset, errors) return str(rawbytes, charset, errors)
except LookupError: except LookupError:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment