minigzip.py 4.21 KB
Newer Older
1
#!/usr/bin/env python
2 3
# Demo program for zlib; it compresses or decompresses files, but *doesn't*
# delete the original.  This doesn't support all of gzip's options.
4 5 6 7 8
#
# The 'gzip' module in the standard library provides a more complete
# implementation of gzip-format files.

import zlib, sys, os
9 10 11 12

FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16

def write32(output, value):
13 14 15
    output.write(chr(value & 255)) ; value=value // 256
    output.write(chr(value & 255)) ; value=value // 256
    output.write(chr(value & 255)) ; value=value // 256
16
    output.write(chr(value & 255))
17

18
def read32(input):
19 20 21 22
    v = ord(input.read(1))
    v += (ord(input.read(1)) << 8 )
    v += (ord(input.read(1)) << 16)
    v += (ord(input.read(1)) << 24)
23 24
    return v

25
def compress (filename, input, output):
26
    output.write('\037\213\010')        # Write the header, ...
Guido van Rossum's avatar
Guido van Rossum committed
27
    output.write(chr(FNAME))            # ... flag byte ...
28

29 30
    statval = os.stat(filename)           # ... modification time ...
    mtime = statval[8]
31
    write32(output, mtime)
Guido van Rossum's avatar
Guido van Rossum committed
32 33 34
    output.write('\002')                # ... slowest compression alg. ...
    output.write('\377')                # ... OS (=unknown) ...
    output.write(filename+'\000')       # ... original filename ...
35

36 37
    crcval = zlib.crc32("")
    compobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
Guido van Rossum's avatar
Guido van Rossum committed
38
                             zlib.DEF_MEM_LEVEL, 0)
39 40 41 42 43
    while True:
        data = input.read(1024)
        if data == "":
            break
        crcval = zlib.crc32(data, crcval)
Guido van Rossum's avatar
Guido van Rossum committed
44
        output.write(compobj.compress(data))
45
    output.write(compobj.flush())
Guido van Rossum's avatar
Guido van Rossum committed
46 47
    write32(output, crcval)             # ... the CRC ...
    write32(output, statval[6])         # and the file size.
48

49 50 51
def decompress (input, output):
    magic = input.read(2)
    if magic != '\037\213':
52
        print('Not a gzipped file')
53 54
        sys.exit(0)
    if ord(input.read(1)) != 8:
55
        print('Unknown compression method')
56 57
        sys.exit(0)
    flag = ord(input.read(1))
Guido van Rossum's avatar
Guido van Rossum committed
58 59
    input.read(4+1+1)                   # Discard modification time,
                                        # extra flags, and OS byte.
60
    if flag & FEXTRA:
Guido van Rossum's avatar
Guido van Rossum committed
61
        # Read & discard the extra field, if present
62 63
        xlen = ord(input.read(1))
        xlen += 256*ord(input.read(1))
Guido van Rossum's avatar
Guido van Rossum committed
64
        input.read(xlen)
65
    if flag & FNAME:
Guido van Rossum's avatar
Guido van Rossum committed
66
        # Read and discard a null-terminated string containing the filename
67 68 69
        while True:
            s = input.read(1)
            if s == '\0': break
70
    if flag & FCOMMENT:
Guido van Rossum's avatar
Guido van Rossum committed
71
        # Read and discard a null-terminated string containing a comment
72
        while True:
Guido van Rossum's avatar
Guido van Rossum committed
73
            s=input.read(1)
74
            if s=='\0': break
75
    if flag & FHCRC:
Guido van Rossum's avatar
Guido van Rossum committed
76
        input.read(2)                   # Read & discard the 16-bit header CRC
77 78 79 80 81

    decompobj = zlib.decompressobj(-zlib.MAX_WBITS)
    crcval = zlib.crc32("")
    length = 0
    while True:
Guido van Rossum's avatar
Guido van Rossum committed
82
        data=input.read(1024)
83 84 85 86 87 88 89 90 91 92 93
        if data == "":
            break
        decompdata = decompobj.decompress(data)
        output.write(decompdata)
        length += len(decompdata)
        crcval = zlib.crc32(decompdata, crcval)

    decompdata = decompobj.flush()
    output.write(decompdata)
    length += len(decompdata)
    crcval = zlib.crc32(decompdata, crcval)
94

95 96 97
    # We've read to the end of the file, so we have to rewind in order
    # to reread the 8 bytes containing the CRC and the file size.  The
    # decompressor is smart and knows when to stop, so feeding it
98
    # extra data is harmless.
99
    input.seek(-8, 2)
100 101 102
    crc32 = read32(input)
    isize = read32(input)
    if crc32 != crcval:
103
        print('CRC check failed.')
104
    if isize != length:
105
        print('Incorrect length of data produced')
106 107 108

def main():
    if len(sys.argv)!=2:
109 110
        print('Usage: minigzip.py <filename>')
        print('  The file will be compressed or decompressed.')
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
        sys.exit(0)

    filename = sys.argv[1]
    if filename.endswith('.gz'):
        compressing = False
        outputname = filename[:-3]
    else:
        compressing = True
        outputname = filename + '.gz'

    input = open(filename, 'rb')
    output = open(outputname, 'wb')

    if compressing:
        compress(filename, input, output)
    else:
        decompress(input, output)

    input.close()
    output.close()
131

132 133
if __name__ == '__main__':
    main()