Kaydet (Commit) 905c8c3d authored tarafından R David Murray's avatar R David Murray

#19772: Do not mutate message when downcoding to 7bit.

This is a bit of an ugly hack because of the way generator pieces together the
output message.  The deepcopys aren't too expensive, though, because we know it
is only called on messages that are not multiparts, and the payload (the thing
that could be large) is an immutable object.

Test and preliminary work on patch by Vajrasky Kok.
üst 7c389e24
...@@ -12,6 +12,7 @@ import time ...@@ -12,6 +12,7 @@ import time
import random import random
import warnings import warnings
from copy import deepcopy
from io import StringIO, BytesIO from io import StringIO, BytesIO
from email._policybase import compat32 from email._policybase import compat32
from email.header import Header from email.header import Header
...@@ -173,10 +174,18 @@ class Generator: ...@@ -173,10 +174,18 @@ class Generator:
# necessary. # necessary.
oldfp = self._fp oldfp = self._fp
try: try:
self._munge_cte = None
self._fp = sfp = self._new_buffer() self._fp = sfp = self._new_buffer()
self._dispatch(msg) self._dispatch(msg)
finally: finally:
self._fp = oldfp self._fp = oldfp
munge_cte = self._munge_cte
del self._munge_cte
# If we munged the cte, copy the message again and re-fix the CTE.
if munge_cte:
msg = deepcopy(msg)
msg.replace_header('content-transfer-encoding', munge_cte[0])
msg.replace_header('content-type', munge_cte[1])
# Write the headers. First we see if the message object wants to # Write the headers. First we see if the message object wants to
# handle that itself. If not, we'll do it generically. # handle that itself. If not, we'll do it generically.
meth = getattr(msg, '_write_headers', None) meth = getattr(msg, '_write_headers', None)
...@@ -225,9 +234,14 @@ class Generator: ...@@ -225,9 +234,14 @@ class Generator:
if _has_surrogates(msg._payload): if _has_surrogates(msg._payload):
charset = msg.get_param('charset') charset = msg.get_param('charset')
if charset is not None: if charset is not None:
# XXX: This copy stuff is an ugly hack to avoid modifying the
# existing message.
msg = deepcopy(msg)
del msg['content-transfer-encoding'] del msg['content-transfer-encoding']
msg.set_payload(payload, charset) msg.set_payload(payload, charset)
payload = msg.get_payload() payload = msg.get_payload()
self._munge_cte = (msg['content-transfer-encoding'],
msg['content-type'])
if self._mangle_from_: if self._mangle_from_:
payload = fcre.sub('>From ', payload) payload = fcre.sub('>From ', payload)
self._write_lines(payload) self._write_lines(payload)
......
...@@ -3495,7 +3495,7 @@ Here's the message body ...@@ -3495,7 +3495,7 @@ Here's the message body
self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n')) self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
class Test8BitBytesHandling(unittest.TestCase): class Test8BitBytesHandling(TestEmailBase):
# In Python3 all input is string, but that doesn't work if the actual input # In Python3 all input is string, but that doesn't work if the actual input
# uses an 8bit transfer encoding. To hack around that, in email 5.1 we # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
# decode byte streams using the surrogateescape error handler, and # decode byte streams using the surrogateescape error handler, and
...@@ -3748,6 +3748,16 @@ class Test8BitBytesHandling(unittest.TestCase): ...@@ -3748,6 +3748,16 @@ class Test8BitBytesHandling(unittest.TestCase):
email.generator.Generator(out).flatten(msg) email.generator.Generator(out).flatten(msg)
self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped) self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
def test_str_generator_should_not_mutate_msg_when_handling_8bit(self):
msg = email.message_from_bytes(self.non_latin_bin_msg)
out = BytesIO()
BytesGenerator(out).flatten(msg)
orig_value = out.getvalue()
Generator(StringIO()).flatten(msg) # Should not mutate msg!
out = BytesIO()
BytesGenerator(out).flatten(msg)
self.assertEqual(out.getvalue(), orig_value)
def test_bytes_generator_with_unix_from(self): def test_bytes_generator_with_unix_from(self):
# The unixfrom contains a current date, so we can't check it # The unixfrom contains a current date, so we can't check it
# literally. Just make sure the first word is 'From' and the # literally. Just make sure the first word is 'From' and the
......
...@@ -48,6 +48,9 @@ Core and Builtins ...@@ -48,6 +48,9 @@ Core and Builtins
Library Library
------- -------
- Issue #19772: email.generator no longer mutates the message object when
doing a down-transform from 8bit to 7bit CTEs.
- Issue #18805: the netmask/hostmask parsing in ipaddress now more reliably - Issue #18805: the netmask/hostmask parsing in ipaddress now more reliably
filters out illegal values and correctly allows any valid prefix length. filters out illegal values and correctly allows any valid prefix length.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment