sndhdr.py 6.07 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
"""Routines to help recognizing sound files.

Function whathdr() recognizes various types of sound file headers.
It understands almost all headers that SOX can decode.

The return tuple contains the following items, in this order:
- file type (as SOX understands it)
- sampling rate (0 if unknown or hard to decode)
- number of channels (0 if unknown or hard to decode)
- number of frames in the file (-1 if unknown or hard to decode)
- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW

If the file doesn't have a recognizable type, it returns None.
If the file can't be opened, IOError is raised.

To compute the total time, divide the number of frames by the
sampling rate (a frame contains a sample for each channel).

Function what() calls whathdr().  (It used to also use some
heuristics for raw data, but this doesn't work very well.)

Finally, the function test() is a simple main program that calls
what() for all files mentioned on the argument list.  For directory
arguments it calls what() for all files in that directory.  Default
argument is "." (testing all files in the current directory).  The
option -r tells it to recurse down directories found inside
explicitly given directories.
"""

30 31 32
# The file structure is top-down except that the test program and its
# subroutine come last.

33
__all__ = ['what', 'whathdr']
34 35

def what(filename):
36
    """Guess the type of a sound file."""
Tim Peters's avatar
Tim Peters committed
37 38
    res = whathdr(filename)
    return res
39 40 41


def whathdr(filename):
42 43 44 45 46 47 48 49
    """Recognize sound headers."""
    with open(filename, 'rb') as f:
        h = f.read(512)
        for tf in tests:
            res = tf(h, f)
            if res:
                return res
        return None
50 51 52 53 54 55 56 57 58


#-----------------------------------#
# Subroutines per sound header type #
#-----------------------------------#

tests = []

def test_aifc(h, f):
Tim Peters's avatar
Tim Peters committed
59
    import aifc
60
    if not h.startswith(b'FORM'):
Tim Peters's avatar
Tim Peters committed
61
        return None
62
    if h[8:12] == b'AIFC':
Tim Peters's avatar
Tim Peters committed
63
        fmt = 'aifc'
64
    elif h[8:12] == b'AIFF':
65
        fmt = 'aiff'
Tim Peters's avatar
Tim Peters committed
66 67 68 69
    else:
        return None
    f.seek(0)
    try:
70
        a = aifc.open(f, 'r')
Tim Peters's avatar
Tim Peters committed
71 72
    except (EOFError, aifc.Error):
        return None
73 74
    return (fmt, a.getframerate(), a.getnchannels(),
            a.getnframes(), 8 * a.getsampwidth())
75 76 77 78 79

tests.append(test_aifc)


def test_au(h, f):
80
    if h.startswith(b'.snd'):
81 82 83
        func = get_long_be
    elif h[:4] in (b'\0ds.', b'dns.'):
        func = get_long_le
Tim Peters's avatar
Tim Peters committed
84 85
    else:
        return None
86 87 88 89 90 91
    filetype = 'au'
    hdr_size = func(h[4:8])
    data_size = func(h[8:12])
    encoding = func(h[12:16])
    rate = func(h[16:20])
    nchannels = func(h[20:24])
Tim Peters's avatar
Tim Peters committed
92 93 94 95 96 97 98 99 100 101 102
    sample_size = 1 # default
    if encoding == 1:
        sample_bits = 'U'
    elif encoding == 2:
        sample_bits = 8
    elif encoding == 3:
        sample_bits = 16
        sample_size = 2
    else:
        sample_bits = '?'
    frame_size = sample_size * nchannels
103 104 105 106 107
    if frame_size:
        nframe = data_size / frame_size
    else:
        nframe = -1
    return filetype, rate, nchannels, nframe, sample_bits
108 109 110 111 112

tests.append(test_au)


def test_hcom(h, f):
113
    if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
Tim Peters's avatar
Tim Peters committed
114
        return None
115 116 117 118 119 120
    divisor = get_long_be(h[144:148])
    if divisor:
        rate = 22050 / divisor
    else:
        rate = 0
    return 'hcom', rate, 1, -1, 8
121 122 123 124 125

tests.append(test_hcom)


def test_voc(h, f):
126
    if not h.startswith(b'Creative Voice File\032'):
Tim Peters's avatar
Tim Peters committed
127 128 129
        return None
    sbseek = get_short_le(h[20:22])
    rate = 0
130 131 132 133
    if 0 <= sbseek < 500 and h[sbseek] == 1:
        ratecode = 256 - h[sbseek+4]
        if ratecode:
            rate = int(1000000.0 / ratecode)
Tim Peters's avatar
Tim Peters committed
134
    return 'voc', rate, 1, -1, 8
135 136 137 138 139

tests.append(test_voc)


def test_wav(h, f):
Tim Peters's avatar
Tim Peters committed
140
    # 'RIFF' <len> 'WAVE' 'fmt ' <len>
141
    if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
Tim Peters's avatar
Tim Peters committed
142 143 144 145 146 147
        return None
    style = get_short_le(h[20:22])
    nchannels = get_short_le(h[22:24])
    rate = get_long_le(h[24:28])
    sample_bits = get_short_le(h[34:36])
    return 'wav', rate, nchannels, -1, sample_bits
148 149 150 151 152

tests.append(test_wav)


def test_8svx(h, f):
153
    if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
Tim Peters's avatar
Tim Peters committed
154 155 156
        return None
    # Should decode it to get #channels -- assume always 1
    return '8svx', 0, 1, 0, 8
157 158 159 160 161

tests.append(test_8svx)


def test_sndt(h, f):
162
    if h.startswith(b'SOUND'):
Tim Peters's avatar
Tim Peters committed
163 164 165
        nsamples = get_long_le(h[8:12])
        rate = get_short_le(h[20:22])
        return 'sndt', rate, 1, nsamples, 8
166 167 168 169 170

tests.append(test_sndt)


def test_sndr(h, f):
171
    if h.startswith(b'\0\0'):
Tim Peters's avatar
Tim Peters committed
172 173 174
        rate = get_short_le(h[2:4])
        if 4000 <= rate <= 25000:
            return 'sndr', rate, 1, -1, 8
175 176 177 178

tests.append(test_sndr)


179 180 181
#-------------------------------------------#
# Subroutines to extract numbers from bytes #
#-------------------------------------------#
182

183 184
def get_long_be(b):
    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
185

186 187
def get_long_le(b):
    return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
188

189 190
def get_short_be(b):
    return (b[0] << 8) | b[1]
191

192 193
def get_short_le(b):
    return (b[1] << 8) | b[0]
194 195 196 197 198 199 200


#--------------------#
# Small test program #
#--------------------#

def test():
Tim Peters's avatar
Tim Peters committed
201 202 203 204 205 206 207 208 209 210 211 212 213
    import sys
    recursive = 0
    if sys.argv[1:] and sys.argv[1] == '-r':
        del sys.argv[1:2]
        recursive = 1
    try:
        if sys.argv[1:]:
            testall(sys.argv[1:], recursive, 1)
        else:
            testall(['.'], recursive, 1)
    except KeyboardInterrupt:
        sys.stderr.write('\n[Interrupted]\n')
        sys.exit(1)
214 215

def testall(list, recursive, toplevel):
Tim Peters's avatar
Tim Peters committed
216 217 218 219
    import sys
    import os
    for filename in list:
        if os.path.isdir(filename):
220
            print(filename + '/:', end=' ')
Tim Peters's avatar
Tim Peters committed
221
            if recursive or toplevel:
222
                print('recursing down:')
Tim Peters's avatar
Tim Peters committed
223 224 225 226
                import glob
                names = glob.glob(os.path.join(filename, '*'))
                testall(names, recursive, 0)
            else:
227
                print('*** directory (use -r) ***')
Tim Peters's avatar
Tim Peters committed
228
        else:
229
            print(filename + ':', end=' ')
Tim Peters's avatar
Tim Peters committed
230 231
            sys.stdout.flush()
            try:
232
                print(what(filename))
Tim Peters's avatar
Tim Peters committed
233
            except IOError:
234
                print('*** not found ***')
Guido van Rossum's avatar
Guido van Rossum committed
235 236

if __name__ == '__main__':
Tim Peters's avatar
Tim Peters committed
237
    test()