quopri.py 7.13 KB
Newer Older
1
#! /usr/bin/env python3
2

3
"""Conversions to/from quoted-printable transport encoding as per RFC 1521."""
4

5 6
# (Dec 1991 version).

7
__all__ = ["encode", "decode", "encodestring", "decodestring"]
8

9
ESCAPE = b'='
10
MAXLINESIZE = 76
11 12
HEX = b'0123456789ABCDEF'
EMPTYSTRING = b''
13

14
try:
Tim Peters's avatar
Tim Peters committed
15
    from binascii import a2b_qp, b2a_qp
16
except ImportError:
Tim Peters's avatar
Tim Peters committed
17 18
    a2b_qp = None
    b2a_qp = None
19

20

21
def needsquoting(c, quotetabs, header):
22
    """Decide whether a particular byte ordinal needs to be quoted.
23

24 25 26 27
    The 'quotetabs' flag indicates whether embedded tabs and spaces should be
    quoted.  Note that line-ending tabs and spaces are always encoded, as per
    RFC 1521.
    """
28 29
    assert isinstance(c, bytes)
    if c in b' \t':
30
        return quotetabs
31
    # if header, we have to escape _ because _ is used to escape space
32
    if c == b'_':
33
        return header
34
    return c == ESCAPE or not (b' ' <= c <= b'~')
35 36

def quote(c):
37
    """Quote a single character."""
38 39 40
    assert isinstance(c, bytes) and len(c)==1
    c = ord(c)
    return ESCAPE + bytes((HEX[c//16], HEX[c%16]))
41

42

43

44
def encode(input, output, quotetabs, header=False):
45
    """Read 'input', apply quoted-printable encoding, and write to 'output'.
46

47
    'input' and 'output' are files with readline() and write() methods.
48 49 50
    The 'quotetabs' flag indicates whether embedded tabs and spaces should be
    quoted.  Note that line-ending tabs and spaces are always encoded, as per
    RFC 1521.
51 52
    The 'header' flag indicates whether we are encoding spaces as _ as per
    RFC 1522.
53
    """
54 55 56

    if b2a_qp is not None:
        data = input.read()
57
        odata = b2a_qp(data, quotetabs=quotetabs, header=header)
58 59
        output.write(odata)
        return
Tim Peters's avatar
Tim Peters committed
60

61
    def write(s, output=output, lineEnd=b'\n'):
62 63
        # RFC 1521 requires that the line ending in a space or tab must have
        # that trailing character encoded.
64 65 66
        if s and s[-1:] in b' \t':
            output.write(s[:-1] + quote(s[-1:]) + lineEnd)
        elif s == b'.':
67
            output.write(quote(s) + lineEnd)
68 69 70 71
        else:
            output.write(s + lineEnd)

    prevline = None
72 73 74 75
    while 1:
        line = input.readline()
        if not line:
            break
76 77
        outline = []
        # Strip off any readline induced trailing newline
78 79
        stripped = b''
        if line[-1:] == b'\n':
80
            line = line[:-1]
81
            stripped = b'\n'
82
        # Calculate the un-length-limited encoded line
83
        for c in line:
84
            c = bytes((c,))
85
            if needsquoting(c, quotetabs, header):
86
                c = quote(c)
87 88
            if header and c == b' ':
                outline.append(b'_')
89 90
            else:
                outline.append(c)
91
        # First, write out the previous line
92 93
        if prevline is not None:
            write(prevline)
94 95 96 97 98 99
        # Now see if we need any soft line breaks because of RFC-imposed
        # length limitations.  Then do the thisline->prevline dance.
        thisline = EMPTYSTRING.join(outline)
        while len(thisline) > MAXLINESIZE:
            # Don't forget to include the soft line break `=' sign in the
            # length calculation!
100
            write(thisline[:MAXLINESIZE-1], lineEnd=b'=\n')
101 102 103
            thisline = thisline[MAXLINESIZE-1:]
        # Write out the current line
        prevline = thisline
104 105 106
    # Write out the last line, without a trailing newline
    if prevline is not None:
        write(prevline, lineEnd=stripped)
107

108
def encodestring(s, quotetabs=False, header=False):
109
    if b2a_qp is not None:
110
        return b2a_qp(s, quotetabs=quotetabs, header=header)
111 112 113
    from io import BytesIO
    infp = BytesIO(s)
    outfp = BytesIO()
114
    encode(infp, outfp, quotetabs, header)
115 116 117
    return outfp.getvalue()


118

119
def decode(input, output, header=False):
120
    """Read 'input', apply quoted-printable decoding, and write to 'output'.
121 122 123 124 125
    'input' and 'output' are files with readline() and write() methods.
    If 'header' is true, decode underscore as space (per RFC 1522)."""

    if a2b_qp is not None:
        data = input.read()
126
        odata = a2b_qp(data, header=header)
127 128
        output.write(odata)
        return
129

130
    new = b''
131 132 133 134
    while 1:
        line = input.readline()
        if not line: break
        i, n = 0, len(line)
135
        if n > 0 and line[n-1:n] == b'\n':
136 137
            partial = 0; n = n-1
            # Strip trailing whitespace
138
            while n > 0 and line[n-1:n] in b" \t\r":
139 140 141 142
                n = n-1
        else:
            partial = 1
        while i < n:
143 144 145
            c = line[i:i+1]
            if c == b'_' and header:
                new = new + b' '; i = i+1
146
            elif c != ESCAPE:
147 148 149 150 151
                new = new + c; i = i+1
            elif i+1 == n and not partial:
                partial = 1; break
            elif i+1 < n and line[i+1] == ESCAPE:
                new = new + ESCAPE; i = i+2
152 153
            elif i+2 < n and ishex(line[i+1:i+2]) and ishex(line[i+2:i+3]):
                new = new + bytes((unhex(line[i+1:i+3]),)); i = i+3
154 155 156
            else: # Bad escape sequence -- leave it in
                new = new + c; i = i+1
        if not partial:
157 158
            output.write(new + b'\n')
            new = b''
159 160
    if new:
        output.write(new)
161

162
def decodestring(s, header=False):
163
    if a2b_qp is not None:
164
        return a2b_qp(s, header=header)
165 166 167
    from io import BytesIO
    infp = BytesIO(s)
    outfp = BytesIO()
168
    decode(infp, outfp, header=header)
169 170 171
    return outfp.getvalue()


172

173
# Other helper functions
174
def ishex(c):
175 176 177
    """Return true if the byte ordinal 'c' is a hexadecimal digit in ASCII."""
    assert isinstance(c, bytes)
    return b'0' <= c <= b'9' or b'a' <= c <= b'f' or b'A' <= c <= b'F'
178 179

def unhex(s):
180 181 182
    """Get the integer value of a hexadecimal number."""
    bits = 0
    for c in s:
183 184
        c = bytes((c,))
        if b'0' <= c <= b'9':
185
            i = ord('0')
186
        elif b'a' <= c <= b'f':
187
            i = ord('a')-10
188 189
        elif b'A' <= c <= b'F':
            i = ord(b'A')-10
190
        else:
191
            assert False, "non-hex digit "+repr(c)
192 193
        bits = bits*16 + (ord(c) - i)
    return bits
194

195

196

197
def main():
198 199 200 201
    import sys
    import getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'td')
202
    except getopt.error as msg:
203
        sys.stdout = sys.stderr
204 205 206 207
        print(msg)
        print("usage: quopri [-t | -d] [file] ...")
        print("-t: quote tabs")
        print("-d: decode; default encode")
208 209 210 211 212 213 214 215
        sys.exit(2)
    deco = 0
    tabs = 0
    for o, a in opts:
        if o == '-t': tabs = 1
        if o == '-d': deco = 1
    if tabs and deco:
        sys.stdout = sys.stderr
216
        print("-t and -d are mutually exclusive")
217 218 219 220 221
        sys.exit(2)
    if not args: args = ['-']
    sts = 0
    for file in args:
        if file == '-':
222
            fp = sys.stdin.buffer
223 224
        else:
            try:
225
                fp = open(file, "rb")
226
            except IOError as msg:
227 228 229
                sys.stderr.write("%s: can't open (%s)\n" % (file, msg))
                sts = 1
                continue
230 231 232 233 234 235 236 237
        try:
            if deco:
                decode(fp, sys.stdout.buffer)
            else:
                encode(fp, sys.stdout.buffer, tabs)
        finally:
            if file != '-':
                fp.close()
238 239
    if sts:
        sys.exit(sts)
240

241

242

243
if __name__ == '__main__':
244
    main()