wave.py 17.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
"""Stuff to parse WAVE files.

Usage.

Reading WAVE files:
      f = wave.open(file, 'r')
where file is either the name of a file or an open file pointer.
The open file pointer must have methods read(), seek(), and close().
When the setpos() and rewind() methods are not used, the seek()
method is not  necessary.

This returns an instance of a class with the following public methods:
      getnchannels()  -- returns number of audio channels (1 for
                         mono, 2 for stereo)
      getsampwidth()  -- returns sample width in bytes
      getframerate()  -- returns sampling frequency
      getnframes()    -- returns number of audio frames
      getcomptype()   -- returns compression type ('NONE' for linear samples)
      getcompname()   -- returns human-readable version of
                         compression type ('not compressed' linear samples)
21
      getparams()     -- returns a namedtuple consisting of all of the
22 23 24 25 26 27 28 29 30 31 32
                         above in the above order
      getmarkers()    -- returns None (for compatibility with the
                         aifc module)
      getmark(id)     -- raises an error since the mark does not
                         exist (for compatibility with the aifc module)
      readframes(n)   -- returns at most n frames of audio
      rewind()        -- rewind to the beginning of the audio stream
      setpos(pos)     -- seek to the specified position
      tell()          -- return the current position
      close()         -- close the instance (make it unusable)
The position returned by tell() and the position given to setpos()
33
are compatible and have nothing to do with the actual position in the
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
file.
The close() method is called automatically when the class instance
is destroyed.

Writing WAVE files:
      f = wave.open(file, 'w')
where file is either the name of a file or an open file pointer.
The open file pointer must have methods write(), tell(), seek(), and
close().

This returns an instance of a class with the following public methods:
      setnchannels(n) -- set the number of channels
      setsampwidth(n) -- set the sample width
      setframerate(n) -- set the frame rate
      setnframes(n)   -- set the number of frames
      setcomptype(type, name)
                      -- set the compression type and the
                         human-readable compression type
      setparams(tuple)
                      -- set all parameters at once
      tell()          -- return current position in output file
      writeframesraw(data)
                      -- write audio frames without pathing up the
                         file header
      writeframes(data)
                      -- write audio frames and patch up the file header
      close()         -- patch up the file header and close the
                         output file
You should set the parameters before the first writeframesraw or
writeframes.  The total number of frames does not need to be set,
but when it is set to the correct value, the header does not have to
be patched up.
It is best to first set all parameters, perhaps possibly the
compression type, and then write audio frames using writeframesraw.
68
When all frames have been written, either call writeframes(b'') or
69 70 71 72
close() to patch up the sizes in the header.
The close() method is called automatically when the class instance
is destroyed.
"""
73

74
import builtins
75

76
__all__ = ["open", "openfp", "Error", "Wave_read", "Wave_write"]
77

78 79
class Error(Exception):
    pass
80 81 82

WAVE_FORMAT_PCM = 0x0001

83
_array_fmts = None, 'b', 'h', None, 'i'
84

85
import audioop
Guido van Rossum's avatar
Guido van Rossum committed
86
import struct
87
import sys
88
from chunk import Chunk
89
from collections import namedtuple
90
import warnings
91

92
_wave_params = namedtuple('_wave_params',
93
                     'nchannels sampwidth framerate nframes comptype compname')
94 95

class Wave_read:
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
    """Variables used in this class:

    These variables are available to the user though appropriate
    methods of this class:
    _file -- the open file with methods read(), close(), and seek()
              set through the __init__() method
    _nchannels -- the number of audio channels
              available through the getnchannels() method
    _nframes -- the number of audio frames
              available through the getnframes() method
    _sampwidth -- the number of bytes per audio sample
              available through the getsampwidth() method
    _framerate -- the sampling frequency
              available through the getframerate() method
    _comptype -- the AIFF-C compression type ('NONE' if AIFF)
              available through the getcomptype() method
    _compname -- the human-readable AIFF-C compression type
              available through the getcomptype() method
    _soundpos -- the position in the audio stream
              available through the tell() method, set through the
              setpos() method

    These variables are used internally only:
    _fmt_chunk_read -- 1 iff the FMT chunk has been read
    _data_seek_needed -- 1 iff positioned correctly in audio
              file for readframes()
    _data_chunk -- instantiation of a chunk class for the DATA chunk
    _framesize -- size of one frame in the file
    """

    def initfp(self, file):
        self._convert = None
        self._soundpos = 0
        self._file = Chunk(file, bigendian = 0)
130
        if self._file.getname() != b'RIFF':
131
            raise Error('file does not start with RIFF id')
132
        if self._file.read(4) != b'WAVE':
133
            raise Error('not a WAVE file')
134 135 136 137 138 139 140 141 142
        self._fmt_chunk_read = 0
        self._data_chunk = None
        while 1:
            self._data_seek_needed = 1
            try:
                chunk = Chunk(self._file, bigendian = 0)
            except EOFError:
                break
            chunkname = chunk.getname()
143
            if chunkname == b'fmt ':
144 145
                self._read_fmt_chunk(chunk)
                self._fmt_chunk_read = 1
146
            elif chunkname == b'data':
147
                if not self._fmt_chunk_read:
148
                    raise Error('data chunk before fmt chunk')
149
                self._data_chunk = chunk
150
                self._nframes = chunk.chunksize // self._framesize
151 152 153 154
                self._data_seek_needed = 0
                break
            chunk.skip()
        if not self._fmt_chunk_read or not self._data_chunk:
155
            raise Error('fmt chunk and/or data chunk missing')
156 157

    def __init__(self, f):
158
        self._i_opened_the_file = None
159
        if isinstance(f, str):
160
            f = builtins.open(f, 'rb')
161
            self._i_opened_the_file = f
162
        # else, assume it is an open file object already
163 164 165 166 167 168
        try:
            self.initfp(f)
        except:
            if self._i_opened_the_file:
                f.close()
            raise
169

170 171
    def __del__(self):
        self.close()
172 173 174 175 176 177 178

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.close()

179 180 181 182 183 184 185 186 187 188 189 190
    #
    # User visible methods.
    #
    def getfp(self):
        return self._file

    def rewind(self):
        self._data_seek_needed = 1
        self._soundpos = 0

    def close(self):
        self._file = None
191 192 193 194
        file = self._i_opened_the_file
        if file:
            self._i_opened_the_file = None
            file.close()
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217

    def tell(self):
        return self._soundpos

    def getnchannels(self):
        return self._nchannels

    def getnframes(self):
        return self._nframes

    def getsampwidth(self):
        return self._sampwidth

    def getframerate(self):
        return self._framerate

    def getcomptype(self):
        return self._comptype

    def getcompname(self):
        return self._compname

    def getparams(self):
218
        return _wave_params(self.getnchannels(), self.getsampwidth(),
219 220
                       self.getframerate(), self.getnframes(),
                       self.getcomptype(), self.getcompname())
221 222 223 224 225

    def getmarkers(self):
        return None

    def getmark(self, id):
226
        raise Error('no marks')
227 228 229

    def setpos(self, pos):
        if pos < 0 or pos > self._nframes:
230
            raise Error('position not in range')
231 232 233 234 235 236 237 238 239 240 241
        self._soundpos = pos
        self._data_seek_needed = 1

    def readframes(self, nframes):
        if self._data_seek_needed:
            self._data_chunk.seek(0, 0)
            pos = self._soundpos * self._framesize
            if pos:
                self._data_chunk.seek(pos, 0)
            self._data_seek_needed = 0
        if nframes == 0:
242
            return b''
243 244 245
        data = self._data_chunk.read(nframes * self._framesize)
        if self._sampwidth != 1 and sys.byteorder == 'big':
            data = audioop.byteswap(data, self._sampwidth)
246 247
        if self._convert and data:
            data = self._convert(data)
248
        self._soundpos = self._soundpos + len(data) // (self._nchannels * self._sampwidth)
249 250 251 252 253 254 255
        return data

    #
    # Internal methods.
    #

    def _read_fmt_chunk(self, chunk):
256 257 258 259
        try:
            wFormatTag, self._nchannels, self._framerate, dwAvgBytesPerSec, wBlockAlign = struct.unpack_from('<HHLLH', chunk.read(14))
        except struct.error:
            raise EOFError from None
260
        if wFormatTag == WAVE_FORMAT_PCM:
261 262 263 264
            try:
                sampwidth = struct.unpack_from('<H', chunk.read(2))[0]
            except struct.error:
                raise EOFError from None
265
            self._sampwidth = (sampwidth + 7) // 8
266 267
            if not self._sampwidth:
                raise Error('bad sample width')
268
        else:
269
            raise Error('unknown format: %r' % (wFormatTag,))
270 271
        if not self._nchannels:
            raise Error('bad # of channels')
272 273 274
        self._framesize = self._nchannels * self._sampwidth
        self._comptype = 'NONE'
        self._compname = 'not compressed'
275 276

class Wave_write:
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
    """Variables used in this class:

    These variables are user settable through appropriate methods
    of this class:
    _file -- the open file with methods write(), close(), tell(), seek()
              set through the __init__() method
    _comptype -- the AIFF-C compression type ('NONE' in AIFF)
              set through the setcomptype() or setparams() method
    _compname -- the human-readable AIFF-C compression type
              set through the setcomptype() or setparams() method
    _nchannels -- the number of audio channels
              set through the setnchannels() or setparams() method
    _sampwidth -- the number of bytes per audio sample
              set through the setsampwidth() or setparams() method
    _framerate -- the sampling frequency
              set through the setframerate() or setparams() method
    _nframes -- the number of audio frames written to the header
              set through the setnframes() or setparams() method

    These variables are used internally only:
    _datalength -- the size of the audio samples written to the header
    _nframeswritten -- the number of frames actually written
    _datawritten -- the size of the audio samples actually written
    """

    def __init__(self, f):
303
        self._i_opened_the_file = None
304
        if isinstance(f, str):
305
            f = builtins.open(f, 'wb')
306
            self._i_opened_the_file = f
307 308 309 310 311 312
        try:
            self.initfp(f)
        except:
            if self._i_opened_the_file:
                f.close()
            raise
313 314 315 316 317 318 319 320 321 322 323

    def initfp(self, file):
        self._file = file
        self._convert = None
        self._nchannels = 0
        self._sampwidth = 0
        self._framerate = 0
        self._nframes = 0
        self._nframeswritten = 0
        self._datawritten = 0
        self._datalength = 0
324
        self._headerwritten = False
325 326

    def __del__(self):
327
        self.close()
328

329 330 331 332 333 334
    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.close()

335 336 337 338 339
    #
    # User visible methods.
    #
    def setnchannels(self, nchannels):
        if self._datawritten:
340
            raise Error('cannot change parameters after starting to write')
341
        if nchannels < 1:
342
            raise Error('bad # of channels')
343 344 345 346
        self._nchannels = nchannels

    def getnchannels(self):
        if not self._nchannels:
347
            raise Error('number of channels not set')
348 349 350 351
        return self._nchannels

    def setsampwidth(self, sampwidth):
        if self._datawritten:
352
            raise Error('cannot change parameters after starting to write')
353
        if sampwidth < 1 or sampwidth > 4:
354
            raise Error('bad sample width')
355 356 357 358
        self._sampwidth = sampwidth

    def getsampwidth(self):
        if not self._sampwidth:
359
            raise Error('sample width not set')
360 361 362 363
        return self._sampwidth

    def setframerate(self, framerate):
        if self._datawritten:
364
            raise Error('cannot change parameters after starting to write')
365
        if framerate <= 0:
366
            raise Error('bad frame rate')
367
        self._framerate = int(round(framerate))
368 369 370

    def getframerate(self):
        if not self._framerate:
371
            raise Error('frame rate not set')
372 373 374 375
        return self._framerate

    def setnframes(self, nframes):
        if self._datawritten:
376
            raise Error('cannot change parameters after starting to write')
377 378 379 380 381 382 383
        self._nframes = nframes

    def getnframes(self):
        return self._nframeswritten

    def setcomptype(self, comptype, compname):
        if self._datawritten:
384
            raise Error('cannot change parameters after starting to write')
385
        if comptype not in ('NONE',):
386
            raise Error('unsupported compression type')
387 388 389 390 391 392 393 394 395
        self._comptype = comptype
        self._compname = compname

    def getcomptype(self):
        return self._comptype

    def getcompname(self):
        return self._compname

396 397
    def setparams(self, params):
        nchannels, sampwidth, framerate, nframes, comptype, compname = params
398
        if self._datawritten:
399
            raise Error('cannot change parameters after starting to write')
400 401 402 403 404 405 406 407
        self.setnchannels(nchannels)
        self.setsampwidth(sampwidth)
        self.setframerate(framerate)
        self.setnframes(nframes)
        self.setcomptype(comptype, compname)

    def getparams(self):
        if not self._nchannels or not self._sampwidth or not self._framerate:
408
            raise Error('not all parameters set')
409
        return _wave_params(self._nchannels, self._sampwidth, self._framerate,
410
              self._nframes, self._comptype, self._compname)
411 412

    def setmark(self, id, pos, name):
413
        raise Error('setmark() not supported')
414 415

    def getmark(self, id):
416
        raise Error('no marks')
417 418 419

    def getmarkers(self):
        return None
420

421 422 423 424
    def tell(self):
        return self._nframeswritten

    def writeframesraw(self, data):
425 426
        if not isinstance(data, (bytes, bytearray)):
            data = memoryview(data).cast('B')
427
        self._ensure_header_written(len(data))
428
        nframes = len(data) // (self._sampwidth * self._nchannels)
429 430
        if self._convert:
            data = self._convert(data)
431 432 433 434
        if self._sampwidth != 1 and sys.byteorder == 'big':
            data = audioop.byteswap(data, self._sampwidth)
        self._file.write(data)
        self._datawritten += len(data)
435 436 437 438 439 440 441 442
        self._nframeswritten = self._nframeswritten + nframes

    def writeframes(self, data):
        self.writeframesraw(data)
        if self._datalength != self._datawritten:
            self._patchheader()

    def close(self):
443 444
        try:
            if self._file:
445 446 447 448
                self._ensure_header_written(0)
                if self._datalength != self._datawritten:
                    self._patchheader()
                self._file.flush()
449 450 451 452 453 454
        finally:
            self._file = None
            file = self._i_opened_the_file
            if file:
                self._i_opened_the_file = None
                file.close()
455 456 457 458 459 460

    #
    # Internal methods.
    #

    def _ensure_header_written(self, datasize):
461
        if not self._headerwritten:
462
            if not self._nchannels:
463
                raise Error('# channels not specified')
464
            if not self._sampwidth:
465
                raise Error('sample width not specified')
466
            if not self._framerate:
467
                raise Error('sampling rate not specified')
468 469 470
            self._write_header(datasize)

    def _write_header(self, initlength):
471
        assert not self._headerwritten
472
        self._file.write(b'RIFF')
473
        if not self._nframes:
474
            self._nframes = initlength // (self._nchannels * self._sampwidth)
475
        self._datalength = self._nframes * self._nchannels * self._sampwidth
476 477 478 479
        try:
            self._form_length_pos = self._file.tell()
        except (AttributeError, OSError):
            self._form_length_pos = None
480
        self._file.write(struct.pack('<L4s4sLHHLLHH4s',
481
            36 + self._datalength, b'WAVE', b'fmt ', 16,
482 483 484
            WAVE_FORMAT_PCM, self._nchannels, self._framerate,
            self._nchannels * self._framerate * self._sampwidth,
            self._nchannels * self._sampwidth,
485
            self._sampwidth * 8, b'data'))
486 487
        if self._form_length_pos is not None:
            self._data_length_pos = self._file.tell()
488
        self._file.write(struct.pack('<L', self._datalength))
489
        self._headerwritten = True
490 491

    def _patchheader(self):
492
        assert self._headerwritten
493 494 495 496
        if self._datawritten == self._datalength:
            return
        curpos = self._file.tell()
        self._file.seek(self._form_length_pos, 0)
497
        self._file.write(struct.pack('<L', 36 + self._datawritten))
498
        self._file.seek(self._data_length_pos, 0)
499
        self._file.write(struct.pack('<L', self._datawritten))
500 501
        self._file.seek(curpos, 0)
        self._datalength = self._datawritten
502

503
def open(f, mode=None):
504 505 506 507 508 509 510 511 512 513
    if mode is None:
        if hasattr(f, 'mode'):
            mode = f.mode
        else:
            mode = 'rb'
    if mode in ('r', 'rb'):
        return Wave_read(f)
    elif mode in ('w', 'wb'):
        return Wave_write(f)
    else:
514
        raise Error("mode must be 'r', 'rb', 'w', or 'wb'")
515

516 517 518 519
def openfp(f, mode=None):
    warnings.warn("wave.openfp is deprecated since Python 3.7. "
                  "Use wave.open instead.", DeprecationWarning, stacklevel=2)
    return open(f, mode=mode)