Utils.py 9.9 KB
Newer Older
1
# Copyright (C) 2001,2002 Python Software Foundation
2 3 4 5 6
# Author: barry@zope.com (Barry Warsaw)

"""Miscellaneous utilities.
"""

Barry Warsaw's avatar
Barry Warsaw committed
7
import time
8
import socket
9
import re
10 11 12 13 14
import random
import os
import warnings
from cStringIO import StringIO
from types import ListType
15

16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
from rfc822 import quote
from rfc822 import AddressList as _AddressList
from rfc822 import mktime_tz

# We need wormarounds for bugs in these methods in older Pythons (see below)
from rfc822 import parsedate as _parsedate
from rfc822 import parsedate_tz as _parsedate_tz

try:
    True, False
except NameError:
    True = 1
    False = 0

try:
    from quopri import decodestring as _qdecode
except ImportError:
    # Python 2.1 doesn't have quopri.decodestring()
    def _qdecode(s):
        import quopri as _quopri

        if not s:
            return s
        infp = StringIO(s)
        outfp = StringIO()
        _quopri.decode(infp, outfp)
        value = outfp.getvalue()
        if not s.endswith('\n') and value.endswith('\n'):
            return value[:-1]
        return value
46 47 48 49

import base64

# Intrapackage imports
50
from email.Encoders import _bencode, _qencode
51 52

COMMASPACE = ', '
53
EMPTYSTRING = ''
54
UEMPTYSTRING = u''
55 56 57 58
CRLF = '\r\n'

specialsre = re.compile(r'[][\()<>@,:;".]')
escapesre = re.compile(r'[][\()"]')
59 60


61

62 63 64 65 66 67 68 69 70 71 72 73 74 75
# Helpers

def _identity(s):
    return s


def _bdecode(s):
    if not s:
        return s
    # We can't quite use base64.encodestring() since it tacks on a "courtesy
    # newline".  Blech!
    if not s:
        return s
    value = base64.decodestring(s)
76
    if not s.endswith('\n') and value.endswith('\n'):
77 78 79 80
        return value[:-1]
    return value


81

82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
def fix_eols(s):
    """Replace all line-ending characters with \r\n."""
    # Fix newlines with no preceding carriage return
    s = re.sub(r'(?<!\r)\n', CRLF, s)
    # Fix carriage returns with no following newline
    s = re.sub(r'\r(?!\n)', CRLF, s)
    return s



def formataddr(pair):
    """The inverse of parseaddr(), this takes a 2-tuple of the form
    (realname, email_address) and returns the string value suitable
    for an RFC 2822 From, To or Cc header.

    If the first element of pair is false, then the second element is
    returned unmodified.
    """
    name, address = pair
    if name:
        quotes = ''
        if specialsre.search(name):
            quotes = '"'
        name = escapesre.sub(r'\\\g<0>', name)
        return '%s%s%s <%s>' % (quotes, name, quotes, address)
    return address

# For backwards compatibility
def dump_address_pair(pair):
    warnings.warn('Use email.Utils.formataddr() instead',
                  DeprecationWarning, 2)
    return formataddr(pair)



117 118 119
def getaddresses(fieldvalues):
    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
    all = COMMASPACE.join(fieldvalues)
120 121
    a = _AddressList(all)
    return a.addresslist
122 123


124

125 126 127 128 129 130 131 132 133 134 135 136
ecre = re.compile(r'''
  =\?                   # literal =?
  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
  \?                    # literal ?
  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
  \?                    # literal ?
  (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
  \?=                   # literal ?=
  ''', re.VERBOSE | re.IGNORECASE)


def decode(s):
137 138 139 140 141 142 143 144 145 146 147 148
    """Return a decoded string according to RFC 2047, as a unicode string.

    NOTE: This function is deprecated.  Use Header.decode_header() instead.
    """
    warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2)
    # Intra-package import here to avoid circular import problems.
    from email.Header import decode_header
    L = decode_header(s)
    if not isinstance(L, ListType):
        # s wasn't decoded
        return s

149
    rtn = []
150 151 152
    for atom, charset in L:
        if charset is None:
            rtn.append(atom)
153
        else:
154 155 156
            # Convert the string to Unicode using the given encoding.  Leave
            # Unicode conversion errors to strict.
            rtn.append(unicode(atom, charset))
157 158 159 160 161
    # Now that we've decoded everything, we just need to join all the parts
    # together into the final string.
    return UEMPTYSTRING.join(rtn)


162

163 164
def encode(s, charset='iso-8859-1', encoding='q'):
    """Encode a string according to RFC 2047."""
165
    warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2)
166 167
    encoding = encoding.lower()
    if encoding == 'q':
168
        estr = _qencode(s)
169
    elif encoding == 'b':
170 171 172
        estr = _bencode(s)
    else:
        raise ValueError, 'Illegal encoding code: ' + encoding
173
    return '=?%s?%s?%s?=' % (charset.lower(), encoding, estr)
174 175 176



177
def formatdate(timeval=None, localtime=False):
178
    """Returns a date string as specified by RFC 2822, e.g.:
179 180 181

    Fri, 09 Nov 2001 01:08:47 -0000

182 183 184
    Optional timeval if given is a floating point time value as accepted by
    gmtime() and localtime(), otherwise the current time is used.

185
    Optional localtime is a flag that when True, interprets timeval, and
186 187
    returns a date relative to the local timezone instead of UTC, properly
    taking daylight savings time into account.
188 189 190 191 192 193 194 195 196 197 198 199 200
    """
    # Note: we cannot use strftime() because that honors the locale and RFC
    # 2822 requires that day and month names be the English abbreviations.
    if timeval is None:
        timeval = time.time()
    if localtime:
        now = time.localtime(timeval)
        # Calculate timezone offset, based on whether the local zone has
        # daylight savings time, and whether DST is in effect.
        if time.daylight and now[-1]:
            offset = time.altzone
        else:
            offset = time.timezone
201 202 203 204 205 206 207 208
        hours, minutes = divmod(abs(offset), 3600)
        # Remember offset is in seconds west of UTC, but the timezone is in
        # minutes east of UTC, so the signs differ.
        if offset > 0:
            sign = '-'
        else:
            sign = '+'
        zone = '%s%02d%02d' % (sign, hours, minutes / 60)
209 210 211 212 213 214 215 216 217 218 219
    else:
        now = time.gmtime(timeval)
        # Timezone offset is always -0000
        zone = '-0000'
    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
        now[2],
        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
        now[0], now[3], now[4], now[5],
        zone)
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340



def make_msgid(idstring=None):
    """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:

    <20020201195627.33539.96671@nightshade.la.mastaler.com>

    Optional idstring if given is a string used to strengthen the
    uniqueness of the message id.
    """
    timeval = time.time()
    utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
    pid = os.getpid()
    randint = random.randrange(100000)
    if idstring is None:
        idstring = ''
    else:
        idstring = '.' + idstring
    idhost = socket.getfqdn()
    msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
    return msgid



# These functions are in the standalone mimelib version only because they've
# subsequently been fixed in the latest Python versions.  We use this to worm
# around broken older Pythons.
def parsedate(data):
    if not data:
        return None
    return _parsedate(data)


def parsedate_tz(data):
    if not data:
        return None
    return _parsedate_tz(data)


def parseaddr(addr):
    addrs = _AddressList(addr).addresslist
    if not addrs:
        return '', ''
    return addrs[0]


# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
def unquote(str):
    """Remove quotes from a string."""
    if len(str) > 1:
        if str.startswith('"') and str.endswith('"'):
            return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
        if str.startswith('<') and str.endswith('>'):
            return str[1:-1]
    return str



# RFC2231-related functions - parameter encoding and decoding
def decode_rfc2231(s):
    """Decode string according to RFC 2231"""
    import urllib
    charset, language, s = s.split("'", 2)
    s = urllib.unquote(s)
    return charset, language, s


def encode_rfc2231(s, charset=None, language=None):
    """Encode string according to RFC 2231.

    If neither charset nor language is given, then s is returned as-is.  If
    charset is given but not language, the string is encoded using the empty
    string for language.
    """
    import urllib
    s = urllib.quote(s, safe='')
    if charset is None and language is None:
        return s
    if language is None:
        language = ''
    return "%s'%s'%s" % (charset, language, s)


rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')

def decode_params(params):
    """Decode parameters list according to RFC 2231.

    params is a sequence of 2-tuples containing (content type, string value).
    """
    new_params = []
    # maps parameter's name to a list of continuations
    rfc2231_params = {}
    # params is a sequence of 2-tuples containing (content_type, string value)
    name, value = params[0]
    new_params.append((name, value))
    # Cycle through each of the rest of the parameters.
    for name, value in params[1:]:
        value = unquote(value)
        mo = rfc2231_continuation.match(name)
        if mo:
            name, num = mo.group('name', 'num')
            if num is not None:
                num = int(num)
            rfc2231_param1 = rfc2231_params.setdefault(name, [])
            rfc2231_param1.append((num, value))
        else:
            new_params.append((name, '"%s"' % quote(value)))
    if rfc2231_params:
        for name, continuations in rfc2231_params.items():
            value = []
            # Sort by number
            continuations.sort()
            # And now append all values in num order
            for num, continuation in continuations:
                value.append(continuation)
            charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
            new_params.append((name,
                               (charset, language, '"%s"' % quote(value))))
    return new_params