sndhdr.py 5.83 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
"""Routines to help recognizing sound files.

Function whathdr() recognizes various types of sound file headers.
It understands almost all headers that SOX can decode.

The return tuple contains the following items, in this order:
- file type (as SOX understands it)
- sampling rate (0 if unknown or hard to decode)
- number of channels (0 if unknown or hard to decode)
- number of frames in the file (-1 if unknown or hard to decode)
- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW

If the file doesn't have a recognizable type, it returns None.
If the file can't be opened, IOError is raised.

To compute the total time, divide the number of frames by the
sampling rate (a frame contains a sample for each channel).

Function what() calls whathdr().  (It used to also use some
heuristics for raw data, but this doesn't work very well.)

Finally, the function test() is a simple main program that calls
what() for all files mentioned on the argument list.  For directory
arguments it calls what() for all files in that directory.  Default
argument is "." (testing all files in the current directory).  The
option -r tells it to recurse down directories found inside
explicitly given directories.
"""

30 31 32
# The file structure is top-down except that the test program and its
# subroutine come last.

33
__all__ = ["what","whathdr"]
34 35

def what(filename):
Tim Peters's avatar
Tim Peters committed
36 37 38
    """Guess the type of a sound file"""
    res = whathdr(filename)
    return res
39 40 41


def whathdr(filename):
Tim Peters's avatar
Tim Peters committed
42 43 44 45 46 47 48 49
    """Recognize sound headers"""
    f = open(filename, 'r')
    h = f.read(512)
    for tf in tests:
        res = tf(h, f)
        if res:
            return res
    return None
50 51 52 53 54 55 56 57 58


#-----------------------------------#
# Subroutines per sound header type #
#-----------------------------------#

tests = []

def test_aifc(h, f):
Tim Peters's avatar
Tim Peters committed
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
    import aifc
    if h[:4] != 'FORM':
        return None
    if h[8:12] == 'AIFC':
        fmt = 'aifc'
    elif h[8:12] == 'AIFF':
        fmt = 'aiff'
    else:
        return None
    f.seek(0)
    try:
        a = aifc.openfp(f, 'r')
    except (EOFError, aifc.Error):
        return None
    return (fmt, a.getframerate(), a.getnchannels(), \
            a.getnframes(), 8*a.getsampwidth())
75 76 77 78 79

tests.append(test_aifc)


def test_au(h, f):
Tim Peters's avatar
Tim Peters committed
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
    if h[:4] == '.snd':
        f = get_long_be
    elif h[:4] in ('\0ds.', 'dns.'):
        f = get_long_le
    else:
        return None
    type = 'au'
    hdr_size = f(h[4:8])
    data_size = f(h[8:12])
    encoding = f(h[12:16])
    rate = f(h[16:20])
    nchannels = f(h[20:24])
    sample_size = 1 # default
    if encoding == 1:
        sample_bits = 'U'
    elif encoding == 2:
        sample_bits = 8
    elif encoding == 3:
        sample_bits = 16
        sample_size = 2
    else:
        sample_bits = '?'
    frame_size = sample_size * nchannels
    return type, rate, nchannels, data_size/frame_size, sample_bits
104 105 106 107 108

tests.append(test_au)


def test_hcom(h, f):
Tim Peters's avatar
Tim Peters committed
109 110 111 112
    if h[65:69] != 'FSSD' or h[128:132] != 'HCOM':
        return None
    divisor = get_long_be(h[128+16:128+20])
    return 'hcom', 22050/divisor, 1, -1, 8
113 114 115 116 117

tests.append(test_hcom)


def test_voc(h, f):
Tim Peters's avatar
Tim Peters committed
118 119 120 121 122 123 124 125
    if h[:20] != 'Creative Voice File\032':
        return None
    sbseek = get_short_le(h[20:22])
    rate = 0
    if 0 <= sbseek < 500 and h[sbseek] == '\1':
        ratecode = ord(h[sbseek+4])
        rate = int(1000000.0 / (256 - ratecode))
    return 'voc', rate, 1, -1, 8
126 127 128 129 130

tests.append(test_voc)


def test_wav(h, f):
Tim Peters's avatar
Tim Peters committed
131 132 133 134 135 136 137 138
    # 'RIFF' <len> 'WAVE' 'fmt ' <len>
    if h[:4] != 'RIFF' or h[8:12] != 'WAVE' or h[12:16] != 'fmt ':
        return None
    style = get_short_le(h[20:22])
    nchannels = get_short_le(h[22:24])
    rate = get_long_le(h[24:28])
    sample_bits = get_short_le(h[34:36])
    return 'wav', rate, nchannels, -1, sample_bits
139 140 141 142 143

tests.append(test_wav)


def test_8svx(h, f):
Tim Peters's avatar
Tim Peters committed
144 145 146 147
    if h[:4] != 'FORM' or h[8:12] != '8SVX':
        return None
    # Should decode it to get #channels -- assume always 1
    return '8svx', 0, 1, 0, 8
148 149 150 151 152

tests.append(test_8svx)


def test_sndt(h, f):
Tim Peters's avatar
Tim Peters committed
153 154 155 156
    if h[:5] == 'SOUND':
        nsamples = get_long_le(h[8:12])
        rate = get_short_le(h[20:22])
        return 'sndt', rate, 1, nsamples, 8
157 158 159 160 161

tests.append(test_sndt)


def test_sndr(h, f):
Tim Peters's avatar
Tim Peters committed
162 163 164 165
    if h[:2] == '\0\0':
        rate = get_short_le(h[2:4])
        if 4000 <= rate <= 25000:
            return 'sndr', rate, 1, -1, 8
166 167 168 169 170 171 172 173 174

tests.append(test_sndr)


#---------------------------------------------#
# Subroutines to extract numbers from strings #
#---------------------------------------------#

def get_long_be(s):
Tim Peters's avatar
Tim Peters committed
175
    return (ord(s[0])<<24) | (ord(s[1])<<16) | (ord(s[2])<<8) | ord(s[3])
176 177

def get_long_le(s):
Tim Peters's avatar
Tim Peters committed
178
    return (ord(s[3])<<24) | (ord(s[2])<<16) | (ord(s[1])<<8) | ord(s[0])
179 180

def get_short_be(s):
Tim Peters's avatar
Tim Peters committed
181
    return (ord(s[0])<<8) | ord(s[1])
182 183

def get_short_le(s):
Tim Peters's avatar
Tim Peters committed
184
    return (ord(s[1])<<8) | ord(s[0])
185 186 187 188 189 190 191


#--------------------#
# Small test program #
#--------------------#

def test():
Tim Peters's avatar
Tim Peters committed
192 193 194 195 196 197 198 199 200 201 202 203 204
    import sys
    recursive = 0
    if sys.argv[1:] and sys.argv[1] == '-r':
        del sys.argv[1:2]
        recursive = 1
    try:
        if sys.argv[1:]:
            testall(sys.argv[1:], recursive, 1)
        else:
            testall(['.'], recursive, 1)
    except KeyboardInterrupt:
        sys.stderr.write('\n[Interrupted]\n')
        sys.exit(1)
205 206

def testall(list, recursive, toplevel):
Tim Peters's avatar
Tim Peters committed
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
    import sys
    import os
    for filename in list:
        if os.path.isdir(filename):
            print filename + '/:',
            if recursive or toplevel:
                print 'recursing down:'
                import glob
                names = glob.glob(os.path.join(filename, '*'))
                testall(names, recursive, 0)
            else:
                print '*** directory (use -r) ***'
        else:
            print filename + ':',
            sys.stdout.flush()
            try:
                print what(filename)
            except IOError:
                print '*** not found ***'
Guido van Rossum's avatar
Guido van Rossum committed
226 227

if __name__ == '__main__':
Tim Peters's avatar
Tim Peters committed
228
    test()