quopri.py 6.81 KB
Newer Older
1 2
#! /usr/bin/env python

3
"""Conversions to/from quoted-printable transport encoding as per RFC 1521."""
4

5 6
# (Dec 1991 version).

7
__all__ = ["encode", "decode", "encodestring", "decodestring"]
8

9 10 11
ESCAPE = '='
MAXLINESIZE = 76
HEX = '0123456789ABCDEF'
12
EMPTYSTRING = ''
13

14
try:
Tim Peters's avatar
Tim Peters committed
15
    from binascii import a2b_qp, b2a_qp
16
except ImportError:
Tim Peters's avatar
Tim Peters committed
17 18
    a2b_qp = None
    b2a_qp = None
19

20

21
def needsquoting(c, quotetabs, header):
22
    """Decide whether a particular character needs to be quoted.
23

24 25 26 27 28 29
    The 'quotetabs' flag indicates whether embedded tabs and spaces should be
    quoted.  Note that line-ending tabs and spaces are always encoded, as per
    RFC 1521.
    """
    if c in ' \t':
        return quotetabs
30
    # if header, we have to escape _ because _ is used to escape space
Tim Peters's avatar
Tim Peters committed
31
    if c == '_':
32
        return header
33
    return c == ESCAPE or not (' ' <= c <= '~')
34 35

def quote(c):
36 37
    """Quote a single character."""
    i = ord(c)
38
    return ESCAPE + HEX[i//16] + HEX[i%16]
39

40

41

42
def encode(input, output, quotetabs, header = 0):
43
    """Read 'input', apply quoted-printable encoding, and write to 'output'.
44

45
    'input' and 'output' are files with readline() and write() methods.
46 47 48
    The 'quotetabs' flag indicates whether embedded tabs and spaces should be
    quoted.  Note that line-ending tabs and spaces are always encoded, as per
    RFC 1521.
49 50
    The 'header' flag indicates whether we are encoding spaces as _ as per
    RFC 1522.
51
    """
52 53 54 55 56 57

    if b2a_qp is not None:
        data = input.read()
        odata = b2a_qp(data, quotetabs = quotetabs, header = header)
        output.write(odata)
        return
Tim Peters's avatar
Tim Peters committed
58

59 60 61 62 63
    def write(s, output=output, lineEnd='\n'):
        # RFC 1521 requires that the line ending in a space or tab must have
        # that trailing character encoded.
        if s and s[-1:] in ' \t':
            output.write(s[:-1] + quote(s[-1]) + lineEnd)
64 65
        elif s == '.':
            output.write(quote(s) + lineEnd)
66 67 68 69
        else:
            output.write(s + lineEnd)

    prevline = None
70 71 72 73
    while 1:
        line = input.readline()
        if not line:
            break
74 75 76 77
        outline = []
        # Strip off any readline induced trailing newline
        stripped = ''
        if line[-1:] == '\n':
78
            line = line[:-1]
79
            stripped = '\n'
80
        # Calculate the un-length-limited encoded line
81
        for c in line:
82
            if needsquoting(c, quotetabs, header):
83
                c = quote(c)
84 85 86 87
            if header and c == ' ':
                outline.append('_')
            else:
                outline.append(c)
88
        # First, write out the previous line
89 90
        if prevline is not None:
            write(prevline)
91 92 93 94 95 96 97 98 99 100
        # Now see if we need any soft line breaks because of RFC-imposed
        # length limitations.  Then do the thisline->prevline dance.
        thisline = EMPTYSTRING.join(outline)
        while len(thisline) > MAXLINESIZE:
            # Don't forget to include the soft line break `=' sign in the
            # length calculation!
            write(thisline[:MAXLINESIZE-1], lineEnd='=\n')
            thisline = thisline[MAXLINESIZE-1:]
        # Write out the current line
        prevline = thisline
101 102 103
    # Write out the last line, without a trailing newline
    if prevline is not None:
        write(prevline, lineEnd=stripped)
104

105 106 107
def encodestring(s, quotetabs = 0, header = 0):
    if b2a_qp is not None:
        return b2a_qp(s, quotetabs = quotetabs, header = header)
108 109 110
    from cStringIO import StringIO
    infp = StringIO(s)
    outfp = StringIO()
111
    encode(infp, outfp, quotetabs, header)
112 113 114
    return outfp.getvalue()


115

116
def decode(input, output, header = 0):
117
    """Read 'input', apply quoted-printable decoding, and write to 'output'.
118 119 120 121 122 123 124 125
    'input' and 'output' are files with readline() and write() methods.
    If 'header' is true, decode underscore as space (per RFC 1522)."""

    if a2b_qp is not None:
        data = input.read()
        odata = a2b_qp(data, header = header)
        output.write(odata)
        return
126

127 128 129 130 131 132 133 134
    new = ''
    while 1:
        line = input.readline()
        if not line: break
        i, n = 0, len(line)
        if n > 0 and line[n-1] == '\n':
            partial = 0; n = n-1
            # Strip trailing whitespace
135
            while n > 0 and line[n-1] in " \t\r":
136 137 138 139 140
                n = n-1
        else:
            partial = 1
        while i < n:
            c = line[i]
141 142 143
            if c == '_' and header:
                new = new + ' '; i = i+1
            elif c != ESCAPE:
144 145 146 147 148 149 150 151 152 153 154 155 156 157
                new = new + c; i = i+1
            elif i+1 == n and not partial:
                partial = 1; break
            elif i+1 < n and line[i+1] == ESCAPE:
                new = new + ESCAPE; i = i+2
            elif i+2 < n and ishex(line[i+1]) and ishex(line[i+2]):
                new = new + chr(unhex(line[i+1:i+3])); i = i+3
            else: # Bad escape sequence -- leave it in
                new = new + c; i = i+1
        if not partial:
            output.write(new + '\n')
            new = ''
    if new:
        output.write(new)
158

159 160 161
def decodestring(s, header = 0):
    if a2b_qp is not None:
        return a2b_qp(s, header = header)
162 163 164
    from cStringIO import StringIO
    infp = StringIO(s)
    outfp = StringIO()
165
    decode(infp, outfp, header = header)
166 167 168
    return outfp.getvalue()


169

170
# Other helper functions
171
def ishex(c):
172 173
    """Return true if the character 'c' is a hexadecimal digit."""
    return '0' <= c <= '9' or 'a' <= c <= 'f' or 'A' <= c <= 'F'
174 175

def unhex(s):
176 177 178 179 180 181 182 183 184 185 186 187 188
    """Get the integer value of a hexadecimal number."""
    bits = 0
    for c in s:
        if '0' <= c <= '9':
            i = ord('0')
        elif 'a' <= c <= 'f':
            i = ord('a')-10
        elif 'A' <= c <= 'F':
            i = ord('A')-10
        else:
            break
        bits = bits*16 + (ord(c) - i)
    return bits
189

190

191

192
def main():
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
    import sys
    import getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'td')
    except getopt.error, msg:
        sys.stdout = sys.stderr
        print msg
        print "usage: quopri [-t | -d] [file] ..."
        print "-t: quote tabs"
        print "-d: decode; default encode"
        sys.exit(2)
    deco = 0
    tabs = 0
    for o, a in opts:
        if o == '-t': tabs = 1
        if o == '-d': deco = 1
    if tabs and deco:
        sys.stdout = sys.stderr
        print "-t and -d are mutually exclusive"
        sys.exit(2)
    if not args: args = ['-']
    sts = 0
    for file in args:
        if file == '-':
            fp = sys.stdin
        else:
            try:
                fp = open(file)
            except IOError, msg:
                sys.stderr.write("%s: can't open (%s)\n" % (file, msg))
                sts = 1
                continue
        if deco:
            decode(fp, sys.stdout)
        else:
            encode(fp, sys.stdout, tabs)
        if fp is not sys.stdin:
            fp.close()
    if sts:
        sys.exit(sts)
233

234

235

236
if __name__ == '__main__':
237
    main()