Kaydet (Commit) d92ae78b authored tarafından Barry Warsaw's avatar Barry Warsaw

Forward port some fixes that were in email 2.5 but for some reason didn't make

it into email 4.0.  Specifically, in Message.get_content_charset(), handle RFC
2231 headers that contain an encoding not known to Python, or a character in
the data that isn't in the charset encoding.  Also forward port the
appropriate unit tests.
üst 9815f8b2
......@@ -747,7 +747,18 @@ class Message:
if isinstance(charset, tuple):
# RFC 2231 encoded, so decode it, and it better end up as ascii.
pcharset = charset[0] or 'us-ascii'
charset = unicode(charset[2], pcharset).encode('us-ascii')
try:
# LookupError will be raised if the charset isn't known to
# Python. UnicodeError will be raised if the encoded text
# contains a character not in the charset.
charset = unicode(charset[2], pcharset).encode('us-ascii')
except (LookupError, UnicodeError):
charset = charset[2]
# charset character must be in us-ascii range
try:
charset = unicode(charset, 'us-ascii').encode('us-ascii')
except UnicodeError:
return failobj
# RFC 2046, $4.1.2 says charsets are not case sensitive
return charset.lower()
......
......@@ -3086,6 +3086,50 @@ Content-Type: text/plain;
self.assertEqual(msg.get_content_charset(),
'this is even more ***fun*** is it not.pdf')
def test_rfc2231_bad_encoding_in_filename(self):
m = '''\
Content-Disposition: inline;
\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
\tfilename*2="is it not.pdf"
'''
msg = email.message_from_string(m)
self.assertEqual(msg.get_filename(),
'This is even more ***fun*** is it not.pdf')
def test_rfc2231_bad_encoding_in_charset(self):
m = """\
Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
"""
msg = email.message_from_string(m)
# This should return None because non-ascii characters in the charset
# are not allowed.
self.assertEqual(msg.get_content_charset(), None)
def test_rfc2231_bad_character_in_charset(self):
m = """\
Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
"""
msg = email.message_from_string(m)
# This should return None because non-ascii characters in the charset
# are not allowed.
self.assertEqual(msg.get_content_charset(), None)
def test_rfc2231_bad_character_in_filename(self):
m = '''\
Content-Disposition: inline;
\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
\tfilename*2*="is it not.pdf%E2"
'''
msg = email.message_from_string(m)
self.assertEqual(msg.get_filename(),
u'This is even more ***fun*** is it not.pdf\ufffd')
def test_rfc2231_unknown_encoding(self):
m = """\
Content-Transfer-Encoding: 8bit
......
......@@ -3092,6 +3092,50 @@ Content-Type: text/plain;
self.assertEqual(msg.get_content_charset(),
'this is even more ***fun*** is it not.pdf')
def test_rfc2231_bad_encoding_in_filename(self):
m = '''\
Content-Disposition: inline;
\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
\tfilename*2="is it not.pdf"
'''
msg = email.message_from_string(m)
self.assertEqual(msg.get_filename(),
'This is even more ***fun*** is it not.pdf')
def test_rfc2231_bad_encoding_in_charset(self):
m = """\
Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
"""
msg = email.message_from_string(m)
# This should return None because non-ascii characters in the charset
# are not allowed.
self.assertEqual(msg.get_content_charset(), None)
def test_rfc2231_bad_character_in_charset(self):
m = """\
Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
"""
msg = email.message_from_string(m)
# This should return None because non-ascii characters in the charset
# are not allowed.
self.assertEqual(msg.get_content_charset(), None)
def test_rfc2231_bad_character_in_filename(self):
m = '''\
Content-Disposition: inline;
\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
\tfilename*2*="is it not.pdf%E2"
'''
msg = email.message_from_string(m)
self.assertEqual(msg.get_filename(),
u'This is even more ***fun*** is it not.pdf\ufffd')
def test_rfc2231_unknown_encoding(self):
m = """\
Content-Transfer-Encoding: 8bit
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment