mailerdaemon.py 7.85 KB
Newer Older
1
#!/usr/bin/env python3
2
"""Classes to parse mailer-daemon messages."""
3

4
import calendar
Georg Brandl's avatar
Georg Brandl committed
5
import email.message
6
import re
7 8 9
import os
import sys

10 11 12 13

class Unparseable(Exception):
    pass

14

Georg Brandl's avatar
Georg Brandl committed
15 16 17
class ErrorMessage(email.message.Message):
    def __init__(self):
        email.message.Message.__init__(self)
18
        self.sub = ''
19 20

    def is_warning(self):
21
        sub = self.get('Subject')
Guido van Rossum's avatar
Guido van Rossum committed
22 23
        if not sub:
            return 0
24
        sub = sub.lower()
25 26 27 28
        if sub.startswith('waiting mail'):
            return 1
        if 'warning' in sub:
            return 1
Guido van Rossum's avatar
Guido van Rossum committed
29 30
        self.sub = sub
        return 0
31 32

    def get_errors(self):
Guido van Rossum's avatar
Guido van Rossum committed
33 34 35 36 37 38 39
        for p in EMPARSERS:
            self.rewindbody()
            try:
                return p(self.fp, self.sub)
            except Unparseable:
                pass
        raise Unparseable
40

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
# List of re's or tuples of re's.
# If a re, it should contain at least a group (?P<email>...) which
# should refer to the email address.  The re can also contain a group
# (?P<reason>...) which should refer to the reason (error message).
# If no reason is present, the emparse_list_reason list is used to
# find a reason.
# If a tuple, the tuple should contain 2 re's.  The first re finds a
# location, the second re is repeated one or more times to find
# multiple email addresses.  The second re is matched (not searched)
# where the previous match ended.
# The re's are compiled using the re module.
emparse_list_list = [
    'error: (?P<reason>unresolvable): (?P<email>.+)',
    ('----- The following addresses had permanent fatal errors -----\n',
     '(?P<email>[^ \n].*)\n( .*\n)?'),
    'remote execution.*\n.*rmail (?P<email>.+)',
    ('The following recipients did not receive your message:\n\n',
     ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
    '------- Failure Reasons  --------\n\n(?P<reason>.*)\n(?P<email>.*)',
    '^<(?P<email>.*)>:\n(?P<reason>.*)',
    '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
    '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
    '^Original-Recipient: rfc822;(?P<email>.*)',
    '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
    '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
    '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
    '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
    '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
    ]
# compile the re's in the list and store them in-place.
for i in range(len(emparse_list_list)):
    x = emparse_list_list[i]
    if type(x) is type(''):
        x = re.compile(x, re.MULTILINE)
    else:
        xl = []
        for x in x:
            xl.append(re.compile(x, re.MULTILINE))
        x = tuple(xl)
        del xl
    emparse_list_list[i] = x
    del x
del i

# list of re's used to find reasons (error messages).
# if a string, "<>" is replaced by a copy of the email address.
# The expressions are searched for in order.  After the first match,
# no more expressions are searched for.  So, order is important.
emparse_list_reason = [
    r'^5\d{2} <>\.\.\. (?P<reason>.*)',
    '<>\.\.\. (?P<reason>.*)',
    re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
    re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
    re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
    ]
emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
def emparse_list(fp, sub):
    data = fp.read()
    res = emparse_list_from.search(data)
    if res is None:
        from_index = len(data)
    else:
        from_index = res.start(0)
104
    errors = []
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
    emails = []
    reason = None
    for regexp in emparse_list_list:
        if type(regexp) is type(()):
            res = regexp[0].search(data, 0, from_index)
            if res is not None:
                try:
                    reason = res.group('reason')
                except IndexError:
                    pass
                while 1:
                    res = regexp[1].match(data, res.end(0), from_index)
                    if res is None:
                        break
                    emails.append(res.group('email'))
Guido van Rossum's avatar
Guido van Rossum committed
120 121
                break
        else:
122 123 124 125 126 127 128 129 130
            res = regexp.search(data, 0, from_index)
            if res is not None:
                emails.append(res.group('email'))
                try:
                    reason = res.group('reason')
                except IndexError:
                    pass
                break
    if not emails:
Guido van Rossum's avatar
Guido van Rossum committed
131
        raise Unparseable
132 133 134 135 136 137 138 139
    if not reason:
        reason = sub
        if reason[:15] == 'returned mail: ':
            reason = reason[15:]
        for regexp in emparse_list_reason:
            if type(regexp) is type(''):
                for i in range(len(emails)-1,-1,-1):
                    email = emails[i]
140
                    exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
141 142
                    res = exp.search(data)
                    if res is not None:
143
                        errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
144 145 146 147 148 149 150
                        del emails[i]
                continue
            res = regexp.search(data)
            if res is not None:
                reason = res.group('reason')
                break
    for email in emails:
151
        errors.append(' '.join((email.strip()+': '+reason).split()))
152
    return errors
153

154
EMPARSERS = [emparse_list]
155 156

def sort_numeric(a, b):
157 158
    a = int(a)
    b = int(b)
159 160 161 162 163 164
    if a < b:
        return -1
    elif a > b:
        return 1
    else:
        return 0
165 166 167

def parsedir(dir, modify):
    os.chdir(dir)
168
    pat = re.compile('^[0-9]*$')
169
    errordict = {}
170
    errorfirst = {}
171 172
    errorlast = {}
    nok = nwarn = nbad = 0
173 174

    # find all numeric file names and sort them
175
    files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
176
    files.sort(sort_numeric)
Tim Peters's avatar
Tim Peters committed
177

178
    for fn in files:
Guido van Rossum's avatar
Guido van Rossum committed
179 180
        # Lets try to parse the file.
        fp = open(fn)
Georg Brandl's avatar
Georg Brandl committed
181
        m = email.message_from_file(fp, _class=ErrorMessage)
Guido van Rossum's avatar
Guido van Rossum committed
182
        sender = m.getaddr('From')
183
        print('%s\t%-40s\t'%(fn, sender[1]), end=' ')
184

Guido van Rossum's avatar
Guido van Rossum committed
185
        if m.is_warning():
186
            fp.close()
187
            print('warning only')
Guido van Rossum's avatar
Guido van Rossum committed
188 189
            nwarn = nwarn + 1
            if modify:
190 191
                os.rename(fn, ','+fn)
##              os.unlink(fn)
Guido van Rossum's avatar
Guido van Rossum committed
192
            continue
193

Guido van Rossum's avatar
Guido van Rossum committed
194 195 196
        try:
            errors = m.get_errors()
        except Unparseable:
197
            print('** Not parseable')
Guido van Rossum's avatar
Guido van Rossum committed
198
            nbad = nbad + 1
199
            fp.close()
Guido van Rossum's avatar
Guido van Rossum committed
200
            continue
201
        print(len(errors), 'errors')
202

Guido van Rossum's avatar
Guido van Rossum committed
203 204 205 206 207 208 209
        # Remember them
        for e in errors:
            try:
                mm, dd = m.getdate('date')[1:1+2]
                date = '%s %02d' % (calendar.month_abbr[mm], dd)
            except:
                date = '??????'
210
            if e not in errordict:
Guido van Rossum's avatar
Guido van Rossum committed
211 212 213 214 215
                errordict[e] = 1
                errorfirst[e] = '%s (%s)' % (fn, date)
            else:
                errordict[e] = errordict[e] + 1
            errorlast[e] = '%s (%s)' % (fn, date)
216

217
        fp.close()
Guido van Rossum's avatar
Guido van Rossum committed
218 219
        nok = nok + 1
        if modify:
220 221
            os.rename(fn, ','+fn)
##          os.unlink(fn)
222

223 224 225 226
    print('--------------')
    print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
    print(nbad,'files unparseable')
    print('--------------')
227
    list = []
228
    for e in errordict.keys():
229
        list.append((errordict[e], errorfirst[e], errorlast[e], e))
230 231
    list.sort()
    for num, first, last, e in list:
232
        print('%d %s - %s\t%s' % (num, first, last, e))
233 234 235 236

def main():
    modify = 0
    if len(sys.argv) > 1 and sys.argv[1] == '-d':
Guido van Rossum's avatar
Guido van Rossum committed
237 238
        modify = 1
        del sys.argv[1]
239
    if len(sys.argv) > 1:
Guido van Rossum's avatar
Guido van Rossum committed
240 241
        for folder in sys.argv[1:]:
            parsedir(folder, modify)
242
    else:
Guido van Rossum's avatar
Guido van Rossum committed
243
        parsedir('/ufs/jack/Mail/errorsinbox', modify)
244 245 246

if __name__ == '__main__' or sys.argv[0] == __name__:
    main()