shutil.py 24.8 KB
Newer Older
1
"""Utility functions for copying and archiving files and directory trees.
2

3
XXX The functions here don't copy the resource fork or other metadata on Mac.
4 5

"""
Guido van Rossum's avatar
Guido van Rossum committed
6

Guido van Rossum's avatar
Guido van Rossum committed
7
import os
8
import sys
9
import stat
10
from os.path import abspath
Georg Brandl's avatar
Georg Brandl committed
11
import fnmatch
12
import collections
13
import errno
14
import tarfile
15

16 17 18 19 20 21
try:
    import bz2
    _BZ2_SUPPORTED = True
except ImportError:
    _BZ2_SUPPORTED = False

22 23 24 25 26 27 28 29 30
try:
    from pwd import getpwnam
except ImportError:
    getpwnam = None

try:
    from grp import getgrnam
except ImportError:
    getgrnam = None
Guido van Rossum's avatar
Guido van Rossum committed
31

32 33 34
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
           "copytree", "move", "rmtree", "Error", "SpecialFileError",
           "ExecError", "make_archive", "get_archive_formats",
35 36
           "register_archive_format", "unregister_archive_format",
           "get_unpack_formats", "register_unpack_format",
37
           "unregister_unpack_format", "unpack_archive", "ignore_patterns"]
38

39
class Error(EnvironmentError):
40
    pass
Guido van Rossum's avatar
Guido van Rossum committed
41

42 43 44 45
class SpecialFileError(EnvironmentError):
    """Raised when trying to do a kind of operation (e.g. copying) which is
    not supported on a special file (e.g. a named pipe)"""

46 47 48
class ExecError(EnvironmentError):
    """Raised when a command could not be executed"""

49 50 51 52 53 54 55 56
class ReadError(EnvironmentError):
    """Raised when an archive cannot be read"""

class RegistryError(Exception):
    """Raised when a registery operation with the archiving
    and unpacking registeries fails"""


Georg Brandl's avatar
Georg Brandl committed
57 58 59 60 61
try:
    WindowsError
except NameError:
    WindowsError = None

62 63 64 65 66 67 68 69
def copyfileobj(fsrc, fdst, length=16*1024):
    """copy data from file-like object fsrc to file-like object fdst"""
    while 1:
        buf = fsrc.read(length)
        if not buf:
            break
        fdst.write(buf)

70 71
def _samefile(src, dst):
    # Macintosh, Unix.
72
    if hasattr(os.path, 'samefile'):
73 74 75 76
        try:
            return os.path.samefile(src, dst)
        except OSError:
            return False
77 78 79 80

    # All other platforms: check for same pathname.
    return (os.path.normcase(os.path.abspath(src)) ==
            os.path.normcase(os.path.abspath(dst)))
Tim Peters's avatar
Tim Peters committed
81

Guido van Rossum's avatar
Guido van Rossum committed
82
def copyfile(src, dst):
83
    """Copy data from src to dst"""
84
    if _samefile(src, dst):
85
        raise Error("`%s` and `%s` are the same file" % (src, dst))
86

87 88 89 90 91 92
    for fn in [src, dst]:
        try:
            st = os.stat(fn)
        except OSError:
            # File most likely does not exist
            pass
93 94 95 96
        else:
            # XXX What about other special files? (sockets, devices...)
            if stat.S_ISFIFO(st.st_mode):
                raise SpecialFileError("`%s` is a named pipe" % fn)
97

98 99 100
    with open(src, 'rb') as fsrc:
        with open(dst, 'wb') as fdst:
            copyfileobj(fsrc, fdst)
Guido van Rossum's avatar
Guido van Rossum committed
101 102

def copymode(src, dst):
103
    """Copy mode bits from src to dst"""
104 105
    if hasattr(os, 'chmod'):
        st = os.stat(src)
106
        mode = stat.S_IMODE(st.st_mode)
107
        os.chmod(dst, mode)
Guido van Rossum's avatar
Guido van Rossum committed
108 109

def copystat(src, dst):
110
    """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
111
    st = os.stat(src)
112
    mode = stat.S_IMODE(st.st_mode)
113
    if hasattr(os, 'utime'):
114
        os.utime(dst, (st.st_atime, st.st_mtime))
115 116
    if hasattr(os, 'chmod'):
        os.chmod(dst, mode)
117
    if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
118 119 120
        try:
            os.chflags(dst, st.st_flags)
        except OSError as why:
121 122
            if (not hasattr(errno, 'EOPNOTSUPP') or
                why.errno != errno.EOPNOTSUPP):
123
                raise
Guido van Rossum's avatar
Guido van Rossum committed
124 125

def copy(src, dst):
126
    """Copy data and mode bits ("cp src dst").
Tim Peters's avatar
Tim Peters committed
127

128 129 130
    The destination may be a directory.

    """
131
    if os.path.isdir(dst):
132
        dst = os.path.join(dst, os.path.basename(src))
133 134
    copyfile(src, dst)
    copymode(src, dst)
Guido van Rossum's avatar
Guido van Rossum committed
135 136

def copy2(src, dst):
137 138 139 140 141
    """Copy data and all stat info ("cp -p src dst").

    The destination may be a directory.

    """
142
    if os.path.isdir(dst):
143
        dst = os.path.join(dst, os.path.basename(src))
144 145
    copyfile(src, dst)
    copystat(src, dst)
Guido van Rossum's avatar
Guido van Rossum committed
146

Georg Brandl's avatar
Georg Brandl committed
147 148
def ignore_patterns(*patterns):
    """Function that can be used as copytree() ignore parameter.
149

Georg Brandl's avatar
Georg Brandl committed
150 151 152 153 154 155 156 157 158
    Patterns is a sequence of glob-style patterns
    that are used to exclude files"""
    def _ignore_patterns(path, names):
        ignored_names = []
        for pattern in patterns:
            ignored_names.extend(fnmatch.filter(names, pattern))
        return set(ignored_names)
    return _ignore_patterns

159 160
def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
             ignore_dangling_symlinks=False):
161
    """Recursively copy a directory tree.
162 163

    The destination directory must not already exist.
164
    If exception(s) occur, an Error is raised with a list of reasons.
165 166 167 168

    If the optional symlinks flag is true, symbolic links in the
    source tree result in symbolic links in the destination tree; if
    it is false, the contents of the files pointed to by symbolic
169 170 171 172 173
    links are copied. If the file pointed by the symlink doesn't
    exist, an exception will be added in the list of errors raised in
    an Error exception at the end of the copy process.

    You can set the optional ignore_dangling_symlinks flag to true if you
174 175
    want to silence this exception. Notice that this has no effect on
    platforms that don't support os.symlink.
176

Georg Brandl's avatar
Georg Brandl committed
177 178 179 180 181 182 183 184 185 186 187 188
    The optional ignore argument is a callable. If given, it
    is called with the `src` parameter, which is the directory
    being visited by copytree(), and `names` which is the list of
    `src` contents, as returned by os.listdir():

        callable(src, names) -> ignored_names

    Since copytree() is called recursively, the callable will be
    called once for each directory that is copied. It returns a
    list of names relative to the `src` directory that should
    not be copied.

189 190 191 192
    The optional copy_function argument is a callable that will be used
    to copy each file. It will be called with the source path and the
    destination path as arguments. By default, copy2() is used, but any
    function that supports the same signature (like copy()) can be used.
193 194

    """
195
    names = os.listdir(src)
Georg Brandl's avatar
Georg Brandl committed
196 197 198 199 200
    if ignore is not None:
        ignored_names = ignore(src, names)
    else:
        ignored_names = set()

Johannes Gijsbers's avatar
Johannes Gijsbers committed
201
    os.makedirs(dst)
202
    errors = []
203
    for name in names:
Georg Brandl's avatar
Georg Brandl committed
204 205
        if name in ignored_names:
            continue
206 207 208
        srcname = os.path.join(src, name)
        dstname = os.path.join(dst, name)
        try:
209
            if os.path.islink(srcname):
210
                linkto = os.readlink(srcname)
211 212 213 214 215 216 217 218
                if symlinks:
                    os.symlink(linkto, dstname)
                else:
                    # ignore dangling symlink if the flag is on
                    if not os.path.exists(linkto) and ignore_dangling_symlinks:
                        continue
                    # otherwise let the copy occurs. copy2 will raise an error
                    copy_function(srcname, dstname)
219
            elif os.path.isdir(srcname):
220
                copytree(srcname, dstname, symlinks, ignore, copy_function)
221
            else:
222
                # Will raise a SpecialFileError for unsupported file types
223
                copy_function(srcname, dstname)
224 225
        # catch the Error from the recursive copytree so that we can
        # continue with other files
226
        except Error as err:
227
            errors.extend(err.args[0])
228 229
        except EnvironmentError as why:
            errors.append((srcname, dstname, str(why)))
230 231
    try:
        copystat(src, dst)
232
    except OSError as why:
Georg Brandl's avatar
Georg Brandl committed
233 234 235 236 237
        if WindowsError is not None and isinstance(why, WindowsError):
            # Copying file access times may fail on Windows
            pass
        else:
            errors.extend((src, dst, str(why)))
238
    if errors:
239
        raise Error(errors)
240

241
def rmtree(path, ignore_errors=False, onerror=None):
242 243
    """Recursively delete a directory tree.

244 245 246 247 248 249 250
    If ignore_errors is set, errors are ignored; otherwise, if onerror
    is set, it is called to handle the error with arguments (func,
    path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
    path is the argument to that function that caused it to fail; and
    exc_info is a tuple returned by sys.exc_info().  If ignore_errors
    is false and onerror is None, an exception is raised.

251
    """
252 253
    if ignore_errors:
        def onerror(*args):
254
            pass
255 256 257
    elif onerror is None:
        def onerror(*args):
            raise
258 259 260 261 262 263 264 265
    try:
        if os.path.islink(path):
            # symlinks to directories are forbidden, see bug #1669
            raise OSError("Cannot call rmtree on a symbolic link")
    except OSError:
        onerror(os.path.islink, path, sys.exc_info())
        # can't continue even if onerror hook returns
        return
266 267 268
    names = []
    try:
        names = os.listdir(path)
269
    except os.error as err:
270 271 272 273 274 275 276 277 278
        onerror(os.listdir, path, sys.exc_info())
    for name in names:
        fullname = os.path.join(path, name)
        try:
            mode = os.lstat(fullname).st_mode
        except os.error:
            mode = 0
        if stat.S_ISDIR(mode):
            rmtree(fullname, ignore_errors, onerror)
279
        else:
280 281
            try:
                os.remove(fullname)
282
            except os.error as err:
283 284 285 286 287
                onerror(os.remove, fullname, sys.exc_info())
    try:
        os.rmdir(path)
    except os.error:
        onerror(os.rmdir, path, sys.exc_info())
288

289 290 291 292 293 294

def _basename(path):
    # A basename() variant which first strips the trailing slash, if present.
    # Thus we always get the last component of the path, even for directories.
    return os.path.basename(path.rstrip(os.path.sep))

295
def move(src, dst):
296 297 298 299 300 301
    """Recursively move a file or directory to another location. This is
    similar to the Unix "mv" command.

    If the destination is a directory or a symlink to a directory, the source
    is moved inside the directory. The destination path must not already
    exist.
302

303 304 305 306 307
    If the destination already exists but is not a directory, it may be
    overwritten depending on os.rename() semantics.

    If the destination is on our current filesystem, then rename() is used.
    Otherwise, src is copied to the destination and then removed.
308 309 310 311
    A lot more could be done here...  A look at a mv.c shows a lot of
    the issues this implementation glosses over.

    """
312 313
    real_dst = dst
    if os.path.isdir(dst):
314 315 316 317 318 319
        if _samefile(src, dst):
            # We might be on a case insensitive filesystem,
            # perform the rename anyway.
            os.rename(src, dst)
            return

320 321 322
        real_dst = os.path.join(dst, _basename(src))
        if os.path.exists(real_dst):
            raise Error("Destination path '%s' already exists" % real_dst)
323
    try:
324
        os.rename(src, real_dst)
325
    except OSError as exc:
326
        if os.path.isdir(src):
327
            if _destinsrc(src, dst):
328
                raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
329
            copytree(src, real_dst, symlinks=True)
330 331
            rmtree(src)
        else:
332
            copy2(src, real_dst)
333
            os.unlink(src)
334

335
def _destinsrc(src, dst):
336 337 338 339 340 341 342
    src = abspath(src)
    dst = abspath(dst)
    if not src.endswith(os.path.sep):
        src += os.path.sep
    if not dst.endswith(os.path.sep):
        dst += os.path.sep
    return dst.startswith(src)
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372

def _get_gid(name):
    """Returns a gid, given a group name."""
    if getgrnam is None or name is None:
        return None
    try:
        result = getgrnam(name)
    except KeyError:
        result = None
    if result is not None:
        return result[2]
    return None

def _get_uid(name):
    """Returns an uid, given a user name."""
    if getpwnam is None or name is None:
        return None
    try:
        result = getpwnam(name)
    except KeyError:
        result = None
    if result is not None:
        return result[2]
    return None

def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
                  owner=None, group=None, logger=None):
    """Create a (possibly compressed) tar file from all the files under
    'base_dir'.

373
    'compress' must be "gzip" (the default), "bzip2", or None.
374 375 376 377 378

    'owner' and 'group' can be used to define an owner and a group for the
    archive that is being built. If not provided, the current owner and group
    will be used.

379
    The output tar file will be named 'base_name' +  ".tar", possibly plus
380
    the appropriate compression extension (".gz", or ".bz2").
381 382 383

    Returns the output filename.
    """
384 385 386 387 388 389
    tar_compression = {'gzip': 'gz', None: ''}
    compress_ext = {'gzip': '.gz'}

    if _BZ2_SUPPORTED:
        tar_compression['bzip2'] = 'bz2'
        compress_ext['bzip2'] = '.bz2'
390 391 392

    # flags for compression program, each element of list will be an argument
    if compress is not None and compress not in compress_ext.keys():
393 394
        raise ValueError("bad value for 'compress', or compression format not "
                         "supported : {0}".format(compress))
395

396
    archive_name = base_name + '.tar' + compress_ext.get(compress, '')
397
    archive_dir = os.path.dirname(archive_name)
398

399
    if not os.path.exists(archive_dir):
400
        if logger is not None:
Éric Araujo's avatar
Éric Araujo committed
401
            logger.info("creating %s", archive_dir)
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
        if not dry_run:
            os.makedirs(archive_dir)

    # creating the tarball
    if logger is not None:
        logger.info('Creating tar archive')

    uid = _get_uid(owner)
    gid = _get_gid(group)

    def _set_uid_gid(tarinfo):
        if gid is not None:
            tarinfo.gid = gid
            tarinfo.gname = group
        if uid is not None:
            tarinfo.uid = uid
            tarinfo.uname = owner
        return tarinfo

    if not dry_run:
        tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
        try:
            tar.add(base_dir, filter=_set_uid_gid)
        finally:
            tar.close()

    return archive_name

430
def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
    # XXX see if we want to keep an external call here
    if verbose:
        zipoptions = "-r"
    else:
        zipoptions = "-rq"
    from distutils.errors import DistutilsExecError
    from distutils.spawn import spawn
    try:
        spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
    except DistutilsExecError:
        # XXX really should distinguish between "couldn't find
        # external 'zip' command" and "zip failed".
        raise ExecError("unable to create zip file '%s': "
            "could neither import the 'zipfile' module nor "
            "find a standalone zip utility") % zip_filename

def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
    """Create a zip file from all the files under 'base_dir'.

450
    The output zip file will be named 'base_name' + ".zip".  Uses either the
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
    "zipfile" Python module (if available) or the InfoZIP "zip" utility
    (if installed and found on the default search path).  If neither tool is
    available, raises ExecError.  Returns the name of the output zip
    file.
    """
    zip_filename = base_name + ".zip"
    archive_dir = os.path.dirname(base_name)

    if not os.path.exists(archive_dir):
        if logger is not None:
            logger.info("creating %s", archive_dir)
        if not dry_run:
            os.makedirs(archive_dir)

    # If zipfile module is not available, try spawning an external 'zip'
    # command.
    try:
        import zipfile
    except ImportError:
        zipfile = None

    if zipfile is None:
473
        _call_external_zip(base_dir, zip_filename, verbose, dry_run)
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
    else:
        if logger is not None:
            logger.info("creating '%s' and adding '%s' to it",
                        zip_filename, base_dir)

        if not dry_run:
            zip = zipfile.ZipFile(zip_filename, "w",
                                  compression=zipfile.ZIP_DEFLATED)

            for dirpath, dirnames, filenames in os.walk(base_dir):
                for name in filenames:
                    path = os.path.normpath(os.path.join(dirpath, name))
                    if os.path.isfile(path):
                        zip.write(path, path)
                        if logger is not None:
                            logger.info("adding '%s'", path)
            zip.close()

    return zip_filename

_ARCHIVE_FORMATS = {
    'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
    'tar':   (_make_tarball, [('compress', None)], "uncompressed tar file"),
    'zip':   (_make_zipfile, [],"ZIP file")
    }

500 501 502 503
if _BZ2_SUPPORTED:
    _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
                                "bzip2'ed tar-file")

504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
def get_archive_formats():
    """Returns a list of supported formats for archiving and unarchiving.

    Each element of the returned sequence is a tuple (name, description)
    """
    formats = [(name, registry[2]) for name, registry in
               _ARCHIVE_FORMATS.items()]
    formats.sort()
    return formats

def register_archive_format(name, function, extra_args=None, description=''):
    """Registers an archive format.

    name is the name of the format. function is the callable that will be
    used to create archives. If provided, extra_args is a sequence of
    (name, value) tuples that will be passed as arguments to the callable.
    description can be provided to describe the format, and will be returned
    by the get_archive_formats() function.
    """
    if extra_args is None:
        extra_args = []
525
    if not callable(function):
526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
        raise TypeError('The %s object is not callable' % function)
    if not isinstance(extra_args, (tuple, list)):
        raise TypeError('extra_args needs to be a sequence')
    for element in extra_args:
        if not isinstance(element, (tuple, list)) or len(element) !=2 :
            raise TypeError('extra_args elements are : (arg_name, value)')

    _ARCHIVE_FORMATS[name] = (function, extra_args, description)

def unregister_archive_format(name):
    del _ARCHIVE_FORMATS[name]

def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
                 dry_run=0, owner=None, group=None, logger=None):
    """Create an archive file (eg. zip or tar).

    'base_name' is the name of the file to create, minus any format-specific
543 544
    extension; 'format' is the archive format: one of "zip", "tar", "bztar"
    or "gztar".
545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590

    'root_dir' is a directory that will be the root directory of the
    archive; ie. we typically chdir into 'root_dir' before creating the
    archive.  'base_dir' is the directory where we start archiving from;
    ie. 'base_dir' will be the common prefix of all files and
    directories in the archive.  'root_dir' and 'base_dir' both default
    to the current directory.  Returns the name of the archive file.

    'owner' and 'group' are used when creating a tar archive. By default,
    uses the current owner and group.
    """
    save_cwd = os.getcwd()
    if root_dir is not None:
        if logger is not None:
            logger.debug("changing into '%s'", root_dir)
        base_name = os.path.abspath(base_name)
        if not dry_run:
            os.chdir(root_dir)

    if base_dir is None:
        base_dir = os.curdir

    kwargs = {'dry_run': dry_run, 'logger': logger}

    try:
        format_info = _ARCHIVE_FORMATS[format]
    except KeyError:
        raise ValueError("unknown archive format '%s'" % format)

    func = format_info[0]
    for arg, val in format_info[1]:
        kwargs[arg] = val

    if format != 'zip':
        kwargs['owner'] = owner
        kwargs['group'] = group

    try:
        filename = func(base_name, base_dir, **kwargs)
    finally:
        if root_dir is not None:
            if logger is not None:
                logger.debug("changing back to '%s'", save_cwd)
            os.chdir(save_cwd)

    return filename
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617


def get_unpack_formats():
    """Returns a list of supported formats for unpacking.

    Each element of the returned sequence is a tuple
    (name, extensions, description)
    """
    formats = [(name, info[0], info[3]) for name, info in
               _UNPACK_FORMATS.items()]
    formats.sort()
    return formats

def _check_unpack_options(extensions, function, extra_args):
    """Checks what gets registered as an unpacker."""
    # first make sure no other unpacker is registered for this extension
    existing_extensions = {}
    for name, info in _UNPACK_FORMATS.items():
        for ext in info[0]:
            existing_extensions[ext] = name

    for extension in extensions:
        if extension in existing_extensions:
            msg = '%s is already registered for "%s"'
            raise RegistryError(msg % (extension,
                                       existing_extensions[extension]))

618
    if not callable(function):
619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
        raise TypeError('The registered function must be a callable')


def register_unpack_format(name, extensions, function, extra_args=None,
                           description=''):
    """Registers an unpack format.

    `name` is the name of the format. `extensions` is a list of extensions
    corresponding to the format.

    `function` is the callable that will be
    used to unpack archives. The callable will receive archives to unpack.
    If it's unable to handle an archive, it needs to raise a ReadError
    exception.

    If provided, `extra_args` is a sequence of
    (name, value) tuples that will be passed as arguments to the callable.
    description can be provided to describe the format, and will be returned
    by the get_unpack_formats() function.
    """
    if extra_args is None:
        extra_args = []
    _check_unpack_options(extensions, function, extra_args)
    _UNPACK_FORMATS[name] = extensions, function, extra_args, description

def unregister_unpack_format(name):
    """Removes the pack format from the registery."""
    del _UNPACK_FORMATS[name]

def _ensure_directory(path):
    """Ensure that the parent directory of `path` exists"""
    dirname = os.path.dirname(path)
    if not os.path.isdir(dirname):
        os.makedirs(dirname)

def _unpack_zipfile(filename, extract_dir):
    """Unpack zip `filename` to `extract_dir`
    """
    try:
        import zipfile
    except ImportError:
        raise ReadError('zlib not supported, cannot unpack this archive.')

    if not zipfile.is_zipfile(filename):
        raise ReadError("%s is not a zip file" % filename)

    zip = zipfile.ZipFile(filename)
    try:
        for info in zip.infolist():
            name = info.filename

            # don't extract absolute paths or ones with .. in them
            if name.startswith('/') or '..' in name:
                continue

            target = os.path.join(extract_dir, *name.split('/'))
            if not target:
                continue

            _ensure_directory(target)
            if not name.endswith('/'):
                # file
                data = zip.read(info.filename)
                f = open(target,'wb')
                try:
                    f.write(data)
                finally:
                    f.close()
                    del data
    finally:
        zip.close()

def _unpack_tarfile(filename, extract_dir):
    """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
    """
    try:
        tarobj = tarfile.open(filename)
    except tarfile.TarError:
        raise ReadError(
            "%s is not a compressed or uncompressed tar file" % filename)
    try:
        tarobj.extractall(extract_dir)
    finally:
        tarobj.close()

_UNPACK_FORMATS = {
    'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
    'tar':   (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
    'zip':   (['.zip'], _unpack_zipfile, [], "ZIP file")
    }

710 711 712 713
if _BZ2_SUPPORTED:
    _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
                                "bzip2'ed tar-file")

714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
def _find_unpack_format(filename):
    for name, info in _UNPACK_FORMATS.items():
        for extension in info[0]:
            if filename.endswith(extension):
                return name
    return None

def unpack_archive(filename, extract_dir=None, format=None):
    """Unpack an archive.

    `filename` is the name of the archive.

    `extract_dir` is the name of the target directory, where the archive
    is unpacked. If not provided, the current working directory is used.

    `format` is the archive format: one of "zip", "tar", or "gztar". Or any
    other registered format. If not provided, unpack_archive will use the
    filename extension and see if an unpacker was registered for that
    extension.

    In case none is found, a ValueError is raised.
    """
    if extract_dir is None:
        extract_dir = os.getcwd()

    if format is not None:
        try:
            format_info = _UNPACK_FORMATS[format]
        except KeyError:
            raise ValueError("Unknown unpack format '{0}'".format(format))

745 746
        func = format_info[1]
        func(filename, extract_dir, **dict(format_info[2]))
747 748 749 750 751 752 753 754 755
    else:
        # we need to look at the registered unpackers supported extensions
        format = _find_unpack_format(filename)
        if format is None:
            raise ReadError("Unknown archive format '{0}'".format(filename))

        func = _UNPACK_FORMATS[format][1]
        kwargs = dict(_UNPACK_FORMATS[format][2])
        func(filename, extract_dir, **kwargs)