sndhdr.py 6.92 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2 3 4 5 6 7 8 9 10 11 12 13
"""Routines to help recognizing sound files.

Function whathdr() recognizes various types of sound file headers.
It understands almost all headers that SOX can decode.

The return tuple contains the following items, in this order:
- file type (as SOX understands it)
- sampling rate (0 if unknown or hard to decode)
- number of channels (0 if unknown or hard to decode)
- number of frames in the file (-1 if unknown or hard to decode)
- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW

If the file doesn't have a recognizable type, it returns None.
14
If the file can't be opened, OSError is raised.
Guido van Rossum's avatar
Guido van Rossum committed
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29

To compute the total time, divide the number of frames by the
sampling rate (a frame contains a sample for each channel).

Function what() calls whathdr().  (It used to also use some
heuristics for raw data, but this doesn't work very well.)

Finally, the function test() is a simple main program that calls
what() for all files mentioned on the argument list.  For directory
arguments it calls what() for all files in that directory.  Default
argument is "." (testing all files in the current directory).  The
option -r tells it to recurse down directories found inside
explicitly given directories.
"""

30 31 32
# The file structure is top-down except that the test program and its
# subroutine come last.

33
__all__ = ['what', 'whathdr']
34

35 36 37 38 39
from collections import namedtuple

SndHeaders = namedtuple('SndHeaders',
                        'filetype framerate nchannels nframes sampwidth')

40 41 42 43 44 45 46 47 48 49 50 51
SndHeaders.filetype.__doc__ = ("""The value for type indicates the data type
and will be one of the strings 'aifc', 'aiff', 'au','hcom',
'sndr', 'sndt', 'voc', 'wav', '8svx', 'sb', 'ub', or 'ul'.""")
SndHeaders.framerate.__doc__ = ("""The sampling_rate will be either the actual
value or 0 if unknown or difficult to decode.""")
SndHeaders.nchannels.__doc__ = ("""The number of channels or 0 if it cannot be
determined or if the value is difficult to decode.""")
SndHeaders.nframes.__doc__ = ("""The value for frames will be either the number
of frames or -1.""")
SndHeaders.sampwidth.__doc__ = ("""Either the sample size in bits or
'A' for A-LAW or 'U' for u-LAW.""")

52
def what(filename):
53
    """Guess the type of a sound file."""
Tim Peters's avatar
Tim Peters committed
54 55
    res = whathdr(filename)
    return res
56 57 58


def whathdr(filename):
59 60 61 62 63 64
    """Recognize sound headers."""
    with open(filename, 'rb') as f:
        h = f.read(512)
        for tf in tests:
            res = tf(h, f)
            if res:
65
                return SndHeaders(*res)
66
        return None
67 68 69 70 71 72 73 74 75


#-----------------------------------#
# Subroutines per sound header type #
#-----------------------------------#

tests = []

def test_aifc(h, f):
Tim Peters's avatar
Tim Peters committed
76
    import aifc
77
    if not h.startswith(b'FORM'):
Tim Peters's avatar
Tim Peters committed
78
        return None
79
    if h[8:12] == b'AIFC':
Tim Peters's avatar
Tim Peters committed
80
        fmt = 'aifc'
81
    elif h[8:12] == b'AIFF':
82
        fmt = 'aiff'
Tim Peters's avatar
Tim Peters committed
83 84 85 86
    else:
        return None
    f.seek(0)
    try:
87
        a = aifc.open(f, 'r')
Tim Peters's avatar
Tim Peters committed
88 89
    except (EOFError, aifc.Error):
        return None
90 91
    return (fmt, a.getframerate(), a.getnchannels(),
            a.getnframes(), 8 * a.getsampwidth())
92 93 94 95 96

tests.append(test_aifc)


def test_au(h, f):
97
    if h.startswith(b'.snd'):
98 99 100
        func = get_long_be
    elif h[:4] in (b'\0ds.', b'dns.'):
        func = get_long_le
Tim Peters's avatar
Tim Peters committed
101 102
    else:
        return None
103 104 105 106 107 108
    filetype = 'au'
    hdr_size = func(h[4:8])
    data_size = func(h[8:12])
    encoding = func(h[12:16])
    rate = func(h[16:20])
    nchannels = func(h[20:24])
Tim Peters's avatar
Tim Peters committed
109 110 111 112 113 114 115 116 117 118 119
    sample_size = 1 # default
    if encoding == 1:
        sample_bits = 'U'
    elif encoding == 2:
        sample_bits = 8
    elif encoding == 3:
        sample_bits = 16
        sample_size = 2
    else:
        sample_bits = '?'
    frame_size = sample_size * nchannels
120 121 122 123 124
    if frame_size:
        nframe = data_size / frame_size
    else:
        nframe = -1
    return filetype, rate, nchannels, nframe, sample_bits
125 126 127 128 129

tests.append(test_au)


def test_hcom(h, f):
130
    if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
Tim Peters's avatar
Tim Peters committed
131
        return None
132 133 134 135 136 137
    divisor = get_long_be(h[144:148])
    if divisor:
        rate = 22050 / divisor
    else:
        rate = 0
    return 'hcom', rate, 1, -1, 8
138 139 140 141 142

tests.append(test_hcom)


def test_voc(h, f):
143
    if not h.startswith(b'Creative Voice File\032'):
Tim Peters's avatar
Tim Peters committed
144 145 146
        return None
    sbseek = get_short_le(h[20:22])
    rate = 0
147 148 149 150
    if 0 <= sbseek < 500 and h[sbseek] == 1:
        ratecode = 256 - h[sbseek+4]
        if ratecode:
            rate = int(1000000.0 / ratecode)
Tim Peters's avatar
Tim Peters committed
151
    return 'voc', rate, 1, -1, 8
152 153 154 155 156

tests.append(test_voc)


def test_wav(h, f):
157
    import wave
Tim Peters's avatar
Tim Peters committed
158
    # 'RIFF' <len> 'WAVE' 'fmt ' <len>
159
    if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
Tim Peters's avatar
Tim Peters committed
160
        return None
161 162 163 164 165 166 167
    f.seek(0)
    try:
        w = wave.openfp(f, 'r')
    except (EOFError, wave.Error):
        return None
    return ('wav', w.getframerate(), w.getnchannels(),
                   w.getnframes(), 8*w.getsampwidth())
168 169 170 171 172

tests.append(test_wav)


def test_8svx(h, f):
173
    if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
Tim Peters's avatar
Tim Peters committed
174 175 176
        return None
    # Should decode it to get #channels -- assume always 1
    return '8svx', 0, 1, 0, 8
177 178 179 180 181

tests.append(test_8svx)


def test_sndt(h, f):
182
    if h.startswith(b'SOUND'):
Tim Peters's avatar
Tim Peters committed
183 184 185
        nsamples = get_long_le(h[8:12])
        rate = get_short_le(h[20:22])
        return 'sndt', rate, 1, nsamples, 8
186 187 188 189 190

tests.append(test_sndt)


def test_sndr(h, f):
191
    if h.startswith(b'\0\0'):
Tim Peters's avatar
Tim Peters committed
192 193 194
        rate = get_short_le(h[2:4])
        if 4000 <= rate <= 25000:
            return 'sndr', rate, 1, -1, 8
195 196 197 198

tests.append(test_sndr)


199 200 201
#-------------------------------------------#
# Subroutines to extract numbers from bytes #
#-------------------------------------------#
202

203 204
def get_long_be(b):
    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
205

206 207
def get_long_le(b):
    return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
208

209 210
def get_short_be(b):
    return (b[0] << 8) | b[1]
211

212 213
def get_short_le(b):
    return (b[1] << 8) | b[0]
214 215 216 217 218 219 220


#--------------------#
# Small test program #
#--------------------#

def test():
Tim Peters's avatar
Tim Peters committed
221 222 223 224 225 226 227 228 229 230 231 232 233
    import sys
    recursive = 0
    if sys.argv[1:] and sys.argv[1] == '-r':
        del sys.argv[1:2]
        recursive = 1
    try:
        if sys.argv[1:]:
            testall(sys.argv[1:], recursive, 1)
        else:
            testall(['.'], recursive, 1)
    except KeyboardInterrupt:
        sys.stderr.write('\n[Interrupted]\n')
        sys.exit(1)
234 235

def testall(list, recursive, toplevel):
Tim Peters's avatar
Tim Peters committed
236 237 238 239
    import sys
    import os
    for filename in list:
        if os.path.isdir(filename):
240
            print(filename + '/:', end=' ')
Tim Peters's avatar
Tim Peters committed
241
            if recursive or toplevel:
242
                print('recursing down:')
Tim Peters's avatar
Tim Peters committed
243 244 245 246
                import glob
                names = glob.glob(os.path.join(filename, '*'))
                testall(names, recursive, 0)
            else:
247
                print('*** directory (use -r) ***')
Tim Peters's avatar
Tim Peters committed
248
        else:
249
            print(filename + ':', end=' ')
Tim Peters's avatar
Tim Peters committed
250 251
            sys.stdout.flush()
            try:
252
                print(what(filename))
253
            except OSError:
254
                print('*** not found ***')
Guido van Rossum's avatar
Guido van Rossum committed
255 256

if __name__ == '__main__':
Tim Peters's avatar
Tim Peters committed
257
    test()