shutil.py 30.5 KB
Newer Older
1
"""Utility functions for copying and archiving files and directory trees.
2

3
XXX The functions here don't copy the resource fork or other metadata on Mac.
4 5

"""
Guido van Rossum's avatar
Guido van Rossum committed
6

Guido van Rossum's avatar
Guido van Rossum committed
7
import os
8
import sys
9
import stat
10
from os.path import abspath
Georg Brandl's avatar
Georg Brandl committed
11
import fnmatch
12
import collections
13
import errno
14
import tarfile
15

16 17
try:
    import bz2
18
    del bz2
19 20 21 22
    _BZ2_SUPPORTED = True
except ImportError:
    _BZ2_SUPPORTED = False

23 24 25 26 27 28 29 30 31
try:
    from pwd import getpwnam
except ImportError:
    getpwnam = None

try:
    from grp import getgrnam
except ImportError:
    getgrnam = None
Guido van Rossum's avatar
Guido van Rossum committed
32

33 34 35
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
           "copytree", "move", "rmtree", "Error", "SpecialFileError",
           "ExecError", "make_archive", "get_archive_formats",
36 37
           "register_archive_format", "unregister_archive_format",
           "get_unpack_formats", "register_unpack_format",
Éric Araujo's avatar
Éric Araujo committed
38
           "unregister_unpack_format", "unpack_archive",
39
           "ignore_patterns", "chown"]
40
           # disk_usage is added later, if available on the platform
41

42
class Error(EnvironmentError):
43
    pass
Guido van Rossum's avatar
Guido van Rossum committed
44

45 46 47 48
class SpecialFileError(EnvironmentError):
    """Raised when trying to do a kind of operation (e.g. copying) which is
    not supported on a special file (e.g. a named pipe)"""

49 50 51
class ExecError(EnvironmentError):
    """Raised when a command could not be executed"""

52 53 54 55 56 57 58 59
class ReadError(EnvironmentError):
    """Raised when an archive cannot be read"""

class RegistryError(Exception):
    """Raised when a registery operation with the archiving
    and unpacking registeries fails"""


Georg Brandl's avatar
Georg Brandl committed
60 61 62 63 64
try:
    WindowsError
except NameError:
    WindowsError = None

65 66 67 68 69 70 71 72
def copyfileobj(fsrc, fdst, length=16*1024):
    """copy data from file-like object fsrc to file-like object fdst"""
    while 1:
        buf = fsrc.read(length)
        if not buf:
            break
        fdst.write(buf)

73 74
def _samefile(src, dst):
    # Macintosh, Unix.
75
    if hasattr(os.path, 'samefile'):
76 77 78 79
        try:
            return os.path.samefile(src, dst)
        except OSError:
            return False
80 81 82 83

    # All other platforms: check for same pathname.
    return (os.path.normcase(os.path.abspath(src)) ==
            os.path.normcase(os.path.abspath(dst)))
Tim Peters's avatar
Tim Peters committed
84

85 86 87 88 89 90 91
def copyfile(src, dst, symlinks=False):
    """Copy data from src to dst.

    If optional flag `symlinks` is set and `src` is a symbolic link, a new
    symlink will be created instead of copying the file it points to.

    """
92
    if _samefile(src, dst):
93
        raise Error("`%s` and `%s` are the same file" % (src, dst))
94

95 96 97 98 99 100
    for fn in [src, dst]:
        try:
            st = os.stat(fn)
        except OSError:
            # File most likely does not exist
            pass
101 102 103 104
        else:
            # XXX What about other special files? (sockets, devices...)
            if stat.S_ISFIFO(st.st_mode):
                raise SpecialFileError("`%s` is a named pipe" % fn)
105

106 107 108 109 110 111 112 113 114
    if symlinks and os.path.islink(src):
        os.symlink(os.readlink(src), dst)
    else:
        with open(src, 'rb') as fsrc:
            with open(dst, 'wb') as fdst:
                copyfileobj(fsrc, fdst)

def copymode(src, dst, symlinks=False):
    """Copy mode bits from src to dst.
Guido van Rossum's avatar
Guido van Rossum committed
115

116 117 118 119 120 121 122 123 124 125 126 127 128 129
    If the optional flag `symlinks` is set, symlinks aren't followed if and
    only if both `src` and `dst` are symlinks. If `lchmod` isn't available (eg.
    Linux), in these cases, this method does nothing.

    """
    if symlinks and os.path.islink(src) and os.path.islink(dst):
        if hasattr(os, 'lchmod'):
            stat_func, chmod_func = os.lstat, os.lchmod
        else:
            return
    elif hasattr(os, 'chmod'):
        stat_func, chmod_func = os.stat, os.chmod
    else:
        return
Guido van Rossum's avatar
Guido van Rossum committed
130

131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
    st = stat_func(src)
    chmod_func(dst, stat.S_IMODE(st.st_mode))

def copystat(src, dst, symlinks=False):
    """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.

    If the optional flag `symlinks` is set, symlinks aren't followed if and
    only if both `src` and `dst` are symlinks.

    """
    def _nop(*args):
        pass

    if symlinks and os.path.islink(src) and os.path.islink(dst):
        stat_func = os.lstat
        utime_func = os.lutimes if hasattr(os, 'lutimes') else _nop
        chmod_func = os.lchmod if hasattr(os, 'lchmod') else _nop
        chflags_func = os.lchflags if hasattr(os, 'lchflags') else _nop
    else:
        stat_func = os.stat
        utime_func = os.utime if hasattr(os, 'utime') else _nop
        chmod_func = os.chmod if hasattr(os, 'chmod') else _nop
        chflags_func = os.chflags if hasattr(os, 'chflags') else _nop

    st = stat_func(src)
156
    mode = stat.S_IMODE(st.st_mode)
157 158 159
    utime_func(dst, (st.st_atime, st.st_mtime))
    chmod_func(dst, mode)
    if hasattr(st, 'st_flags'):
160
        try:
161
            chflags_func(dst, st.st_flags)
162
        except OSError as why:
163 164
            if (not hasattr(errno, 'EOPNOTSUPP') or
                why.errno != errno.EOPNOTSUPP):
165
                raise
Guido van Rossum's avatar
Guido van Rossum committed
166

167
def copy(src, dst, symlinks=False):
168
    """Copy data and mode bits ("cp src dst").
Tim Peters's avatar
Tim Peters committed
169

170 171
    The destination may be a directory.

172 173 174
    If the optional flag `symlinks` is set, symlinks won't be followed. This
    resembles GNU's "cp -P src dst".

175
    """
176
    if os.path.isdir(dst):
177
        dst = os.path.join(dst, os.path.basename(src))
178 179
    copyfile(src, dst, symlinks=symlinks)
    copymode(src, dst, symlinks=symlinks)
Guido van Rossum's avatar
Guido van Rossum committed
180

181
def copy2(src, dst, symlinks=False):
182 183 184 185
    """Copy data and all stat info ("cp -p src dst").

    The destination may be a directory.

186 187 188
    If the optional flag `symlinks` is set, symlinks won't be followed. This
    resembles GNU's "cp -P src dst".

189
    """
190
    if os.path.isdir(dst):
191
        dst = os.path.join(dst, os.path.basename(src))
192 193
    copyfile(src, dst, symlinks=symlinks)
    copystat(src, dst, symlinks=symlinks)
Guido van Rossum's avatar
Guido van Rossum committed
194

Georg Brandl's avatar
Georg Brandl committed
195 196
def ignore_patterns(*patterns):
    """Function that can be used as copytree() ignore parameter.
197

Georg Brandl's avatar
Georg Brandl committed
198 199 200 201 202 203 204 205 206
    Patterns is a sequence of glob-style patterns
    that are used to exclude files"""
    def _ignore_patterns(path, names):
        ignored_names = []
        for pattern in patterns:
            ignored_names.extend(fnmatch.filter(names, pattern))
        return set(ignored_names)
    return _ignore_patterns

207 208
def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
             ignore_dangling_symlinks=False):
209
    """Recursively copy a directory tree.
210 211

    The destination directory must not already exist.
212
    If exception(s) occur, an Error is raised with a list of reasons.
213 214 215 216

    If the optional symlinks flag is true, symbolic links in the
    source tree result in symbolic links in the destination tree; if
    it is false, the contents of the files pointed to by symbolic
217 218 219 220 221
    links are copied. If the file pointed by the symlink doesn't
    exist, an exception will be added in the list of errors raised in
    an Error exception at the end of the copy process.

    You can set the optional ignore_dangling_symlinks flag to true if you
222 223
    want to silence this exception. Notice that this has no effect on
    platforms that don't support os.symlink.
224

Georg Brandl's avatar
Georg Brandl committed
225 226 227 228 229 230 231 232 233 234 235 236
    The optional ignore argument is a callable. If given, it
    is called with the `src` parameter, which is the directory
    being visited by copytree(), and `names` which is the list of
    `src` contents, as returned by os.listdir():

        callable(src, names) -> ignored_names

    Since copytree() is called recursively, the callable will be
    called once for each directory that is copied. It returns a
    list of names relative to the `src` directory that should
    not be copied.

237 238 239 240
    The optional copy_function argument is a callable that will be used
    to copy each file. It will be called with the source path and the
    destination path as arguments. By default, copy2() is used, but any
    function that supports the same signature (like copy()) can be used.
241 242

    """
243
    names = os.listdir(src)
Georg Brandl's avatar
Georg Brandl committed
244 245 246 247 248
    if ignore is not None:
        ignored_names = ignore(src, names)
    else:
        ignored_names = set()

Johannes Gijsbers's avatar
Johannes Gijsbers committed
249
    os.makedirs(dst)
250
    errors = []
251
    for name in names:
Georg Brandl's avatar
Georg Brandl committed
252 253
        if name in ignored_names:
            continue
254 255 256
        srcname = os.path.join(src, name)
        dstname = os.path.join(dst, name)
        try:
257
            if os.path.islink(srcname):
258
                linkto = os.readlink(srcname)
259
                if symlinks:
260 261 262
                    # We can't just leave it to `copy_function` because legacy
                    # code with a custom `copy_function` may rely on copytree
                    # doing the right thing.
263
                    os.symlink(linkto, dstname)
264
                    copystat(srcname, dstname, symlinks=symlinks)
265 266 267 268 269 270
                else:
                    # ignore dangling symlink if the flag is on
                    if not os.path.exists(linkto) and ignore_dangling_symlinks:
                        continue
                    # otherwise let the copy occurs. copy2 will raise an error
                    copy_function(srcname, dstname)
271
            elif os.path.isdir(srcname):
272
                copytree(srcname, dstname, symlinks, ignore, copy_function)
273
            else:
274
                # Will raise a SpecialFileError for unsupported file types
275
                copy_function(srcname, dstname)
276 277
        # catch the Error from the recursive copytree so that we can
        # continue with other files
278
        except Error as err:
279
            errors.extend(err.args[0])
280 281
        except EnvironmentError as why:
            errors.append((srcname, dstname, str(why)))
282 283
    try:
        copystat(src, dst)
284
    except OSError as why:
Georg Brandl's avatar
Georg Brandl committed
285 286 287 288 289
        if WindowsError is not None and isinstance(why, WindowsError):
            # Copying file access times may fail on Windows
            pass
        else:
            errors.extend((src, dst, str(why)))
290
    if errors:
291
        raise Error(errors)
292

293
def rmtree(path, ignore_errors=False, onerror=None):
294 295
    """Recursively delete a directory tree.

296 297 298 299 300 301 302
    If ignore_errors is set, errors are ignored; otherwise, if onerror
    is set, it is called to handle the error with arguments (func,
    path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
    path is the argument to that function that caused it to fail; and
    exc_info is a tuple returned by sys.exc_info().  If ignore_errors
    is false and onerror is None, an exception is raised.

303
    """
304 305
    if ignore_errors:
        def onerror(*args):
306
            pass
307 308 309
    elif onerror is None:
        def onerror(*args):
            raise
310 311 312 313 314 315 316 317
    try:
        if os.path.islink(path):
            # symlinks to directories are forbidden, see bug #1669
            raise OSError("Cannot call rmtree on a symbolic link")
    except OSError:
        onerror(os.path.islink, path, sys.exc_info())
        # can't continue even if onerror hook returns
        return
318 319 320
    names = []
    try:
        names = os.listdir(path)
321
    except os.error:
322 323 324 325 326 327 328 329 330
        onerror(os.listdir, path, sys.exc_info())
    for name in names:
        fullname = os.path.join(path, name)
        try:
            mode = os.lstat(fullname).st_mode
        except os.error:
            mode = 0
        if stat.S_ISDIR(mode):
            rmtree(fullname, ignore_errors, onerror)
331
        else:
332 333
            try:
                os.remove(fullname)
334
            except os.error:
335 336 337 338 339
                onerror(os.remove, fullname, sys.exc_info())
    try:
        os.rmdir(path)
    except os.error:
        onerror(os.rmdir, path, sys.exc_info())
340

341 342 343 344 345 346

def _basename(path):
    # A basename() variant which first strips the trailing slash, if present.
    # Thus we always get the last component of the path, even for directories.
    return os.path.basename(path.rstrip(os.path.sep))

347
def move(src, dst):
348 349 350 351 352 353
    """Recursively move a file or directory to another location. This is
    similar to the Unix "mv" command.

    If the destination is a directory or a symlink to a directory, the source
    is moved inside the directory. The destination path must not already
    exist.
354

355 356 357 358
    If the destination already exists but is not a directory, it may be
    overwritten depending on os.rename() semantics.

    If the destination is on our current filesystem, then rename() is used.
359 360 361 362
    Otherwise, src is copied to the destination and then removed. Symlinks are
    recreated under the new name if os.rename() fails because of cross
    filesystem renames.

363 364 365 366
    A lot more could be done here...  A look at a mv.c shows a lot of
    the issues this implementation glosses over.

    """
367 368
    real_dst = dst
    if os.path.isdir(dst):
369 370 371 372 373 374
        if _samefile(src, dst):
            # We might be on a case insensitive filesystem,
            # perform the rename anyway.
            os.rename(src, dst)
            return

375 376 377
        real_dst = os.path.join(dst, _basename(src))
        if os.path.exists(real_dst):
            raise Error("Destination path '%s' already exists" % real_dst)
378
    try:
379
        os.rename(src, real_dst)
380
    except OSError:
381 382 383 384 385
        if os.path.islink(src):
            linkto = os.readlink(src)
            os.symlink(linkto, real_dst)
            os.unlink(src)
        elif os.path.isdir(src):
386
            if _destinsrc(src, dst):
387
                raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
388
            copytree(src, real_dst, symlinks=True)
389 390
            rmtree(src)
        else:
391
            copy2(src, real_dst)
392
            os.unlink(src)
393

394
def _destinsrc(src, dst):
395 396 397 398 399 400 401
    src = abspath(src)
    dst = abspath(dst)
    if not src.endswith(os.path.sep):
        src += os.path.sep
    if not dst.endswith(os.path.sep):
        dst += os.path.sep
    return dst.startswith(src)
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431

def _get_gid(name):
    """Returns a gid, given a group name."""
    if getgrnam is None or name is None:
        return None
    try:
        result = getgrnam(name)
    except KeyError:
        result = None
    if result is not None:
        return result[2]
    return None

def _get_uid(name):
    """Returns an uid, given a user name."""
    if getpwnam is None or name is None:
        return None
    try:
        result = getpwnam(name)
    except KeyError:
        result = None
    if result is not None:
        return result[2]
    return None

def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
                  owner=None, group=None, logger=None):
    """Create a (possibly compressed) tar file from all the files under
    'base_dir'.

432
    'compress' must be "gzip" (the default), "bzip2", or None.
433 434 435 436 437

    'owner' and 'group' can be used to define an owner and a group for the
    archive that is being built. If not provided, the current owner and group
    will be used.

438
    The output tar file will be named 'base_name' +  ".tar", possibly plus
439
    the appropriate compression extension (".gz", or ".bz2").
440 441 442

    Returns the output filename.
    """
443 444 445 446 447 448
    tar_compression = {'gzip': 'gz', None: ''}
    compress_ext = {'gzip': '.gz'}

    if _BZ2_SUPPORTED:
        tar_compression['bzip2'] = 'bz2'
        compress_ext['bzip2'] = '.bz2'
449 450

    # flags for compression program, each element of list will be an argument
451
    if compress is not None and compress not in compress_ext:
452 453
        raise ValueError("bad value for 'compress', or compression format not "
                         "supported : {0}".format(compress))
454

455
    archive_name = base_name + '.tar' + compress_ext.get(compress, '')
456
    archive_dir = os.path.dirname(archive_name)
457

458
    if not os.path.exists(archive_dir):
459
        if logger is not None:
Éric Araujo's avatar
Éric Araujo committed
460
            logger.info("creating %s", archive_dir)
461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
        if not dry_run:
            os.makedirs(archive_dir)

    # creating the tarball
    if logger is not None:
        logger.info('Creating tar archive')

    uid = _get_uid(owner)
    gid = _get_gid(group)

    def _set_uid_gid(tarinfo):
        if gid is not None:
            tarinfo.gid = gid
            tarinfo.gname = group
        if uid is not None:
            tarinfo.uid = uid
            tarinfo.uname = owner
        return tarinfo

    if not dry_run:
        tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
        try:
            tar.add(base_dir, filter=_set_uid_gid)
        finally:
            tar.close()

    return archive_name

489
def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508
    # XXX see if we want to keep an external call here
    if verbose:
        zipoptions = "-r"
    else:
        zipoptions = "-rq"
    from distutils.errors import DistutilsExecError
    from distutils.spawn import spawn
    try:
        spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
    except DistutilsExecError:
        # XXX really should distinguish between "couldn't find
        # external 'zip' command" and "zip failed".
        raise ExecError("unable to create zip file '%s': "
            "could neither import the 'zipfile' module nor "
            "find a standalone zip utility") % zip_filename

def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
    """Create a zip file from all the files under 'base_dir'.

509
    The output zip file will be named 'base_name' + ".zip".  Uses either the
510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
    "zipfile" Python module (if available) or the InfoZIP "zip" utility
    (if installed and found on the default search path).  If neither tool is
    available, raises ExecError.  Returns the name of the output zip
    file.
    """
    zip_filename = base_name + ".zip"
    archive_dir = os.path.dirname(base_name)

    if not os.path.exists(archive_dir):
        if logger is not None:
            logger.info("creating %s", archive_dir)
        if not dry_run:
            os.makedirs(archive_dir)

    # If zipfile module is not available, try spawning an external 'zip'
    # command.
    try:
        import zipfile
    except ImportError:
        zipfile = None

    if zipfile is None:
532
        _call_external_zip(base_dir, zip_filename, verbose, dry_run)
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555
    else:
        if logger is not None:
            logger.info("creating '%s' and adding '%s' to it",
                        zip_filename, base_dir)

        if not dry_run:
            zip = zipfile.ZipFile(zip_filename, "w",
                                  compression=zipfile.ZIP_DEFLATED)

            for dirpath, dirnames, filenames in os.walk(base_dir):
                for name in filenames:
                    path = os.path.normpath(os.path.join(dirpath, name))
                    if os.path.isfile(path):
                        zip.write(path, path)
                        if logger is not None:
                            logger.info("adding '%s'", path)
            zip.close()

    return zip_filename

_ARCHIVE_FORMATS = {
    'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
    'tar':   (_make_tarball, [('compress', None)], "uncompressed tar file"),
556
    'zip':   (_make_zipfile, [], "ZIP file")
557 558
    }

559 560 561 562
if _BZ2_SUPPORTED:
    _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
                                "bzip2'ed tar-file")

563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
def get_archive_formats():
    """Returns a list of supported formats for archiving and unarchiving.

    Each element of the returned sequence is a tuple (name, description)
    """
    formats = [(name, registry[2]) for name, registry in
               _ARCHIVE_FORMATS.items()]
    formats.sort()
    return formats

def register_archive_format(name, function, extra_args=None, description=''):
    """Registers an archive format.

    name is the name of the format. function is the callable that will be
    used to create archives. If provided, extra_args is a sequence of
    (name, value) tuples that will be passed as arguments to the callable.
    description can be provided to describe the format, and will be returned
    by the get_archive_formats() function.
    """
    if extra_args is None:
        extra_args = []
584
    if not callable(function):
585 586 587 588
        raise TypeError('The %s object is not callable' % function)
    if not isinstance(extra_args, (tuple, list)):
        raise TypeError('extra_args needs to be a sequence')
    for element in extra_args:
589
        if not isinstance(element, (tuple, list)) or len(element) !=2:
590 591 592 593 594 595 596 597 598 599 600 601
            raise TypeError('extra_args elements are : (arg_name, value)')

    _ARCHIVE_FORMATS[name] = (function, extra_args, description)

def unregister_archive_format(name):
    del _ARCHIVE_FORMATS[name]

def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
                 dry_run=0, owner=None, group=None, logger=None):
    """Create an archive file (eg. zip or tar).

    'base_name' is the name of the file to create, minus any format-specific
602 603
    extension; 'format' is the archive format: one of "zip", "tar", "bztar"
    or "gztar".
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649

    'root_dir' is a directory that will be the root directory of the
    archive; ie. we typically chdir into 'root_dir' before creating the
    archive.  'base_dir' is the directory where we start archiving from;
    ie. 'base_dir' will be the common prefix of all files and
    directories in the archive.  'root_dir' and 'base_dir' both default
    to the current directory.  Returns the name of the archive file.

    'owner' and 'group' are used when creating a tar archive. By default,
    uses the current owner and group.
    """
    save_cwd = os.getcwd()
    if root_dir is not None:
        if logger is not None:
            logger.debug("changing into '%s'", root_dir)
        base_name = os.path.abspath(base_name)
        if not dry_run:
            os.chdir(root_dir)

    if base_dir is None:
        base_dir = os.curdir

    kwargs = {'dry_run': dry_run, 'logger': logger}

    try:
        format_info = _ARCHIVE_FORMATS[format]
    except KeyError:
        raise ValueError("unknown archive format '%s'" % format)

    func = format_info[0]
    for arg, val in format_info[1]:
        kwargs[arg] = val

    if format != 'zip':
        kwargs['owner'] = owner
        kwargs['group'] = group

    try:
        filename = func(base_name, base_dir, **kwargs)
    finally:
        if root_dir is not None:
            if logger is not None:
                logger.debug("changing back to '%s'", save_cwd)
            os.chdir(save_cwd)

    return filename
650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676


def get_unpack_formats():
    """Returns a list of supported formats for unpacking.

    Each element of the returned sequence is a tuple
    (name, extensions, description)
    """
    formats = [(name, info[0], info[3]) for name, info in
               _UNPACK_FORMATS.items()]
    formats.sort()
    return formats

def _check_unpack_options(extensions, function, extra_args):
    """Checks what gets registered as an unpacker."""
    # first make sure no other unpacker is registered for this extension
    existing_extensions = {}
    for name, info in _UNPACK_FORMATS.items():
        for ext in info[0]:
            existing_extensions[ext] = name

    for extension in extensions:
        if extension in existing_extensions:
            msg = '%s is already registered for "%s"'
            raise RegistryError(msg % (extension,
                                       existing_extensions[extension]))

677
    if not callable(function):
678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
        raise TypeError('The registered function must be a callable')


def register_unpack_format(name, extensions, function, extra_args=None,
                           description=''):
    """Registers an unpack format.

    `name` is the name of the format. `extensions` is a list of extensions
    corresponding to the format.

    `function` is the callable that will be
    used to unpack archives. The callable will receive archives to unpack.
    If it's unable to handle an archive, it needs to raise a ReadError
    exception.

    If provided, `extra_args` is a sequence of
    (name, value) tuples that will be passed as arguments to the callable.
    description can be provided to describe the format, and will be returned
    by the get_unpack_formats() function.
    """
    if extra_args is None:
        extra_args = []
    _check_unpack_options(extensions, function, extra_args)
    _UNPACK_FORMATS[name] = extensions, function, extra_args, description

def unregister_unpack_format(name):
    """Removes the pack format from the registery."""
    del _UNPACK_FORMATS[name]

def _ensure_directory(path):
    """Ensure that the parent directory of `path` exists"""
    dirname = os.path.dirname(path)
    if not os.path.isdir(dirname):
        os.makedirs(dirname)

def _unpack_zipfile(filename, extract_dir):
    """Unpack zip `filename` to `extract_dir`
    """
    try:
        import zipfile
    except ImportError:
        raise ReadError('zlib not supported, cannot unpack this archive.')

    if not zipfile.is_zipfile(filename):
        raise ReadError("%s is not a zip file" % filename)

    zip = zipfile.ZipFile(filename)
    try:
        for info in zip.infolist():
            name = info.filename

            # don't extract absolute paths or ones with .. in them
            if name.startswith('/') or '..' in name:
                continue

            target = os.path.join(extract_dir, *name.split('/'))
            if not target:
                continue

            _ensure_directory(target)
            if not name.endswith('/'):
                # file
                data = zip.read(info.filename)
741
                f = open(target, 'wb')
742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768
                try:
                    f.write(data)
                finally:
                    f.close()
                    del data
    finally:
        zip.close()

def _unpack_tarfile(filename, extract_dir):
    """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
    """
    try:
        tarobj = tarfile.open(filename)
    except tarfile.TarError:
        raise ReadError(
            "%s is not a compressed or uncompressed tar file" % filename)
    try:
        tarobj.extractall(extract_dir)
    finally:
        tarobj.close()

_UNPACK_FORMATS = {
    'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
    'tar':   (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
    'zip':   (['.zip'], _unpack_zipfile, [], "ZIP file")
    }

769 770 771 772
if _BZ2_SUPPORTED:
    _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
                                "bzip2'ed tar-file")

773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
def _find_unpack_format(filename):
    for name, info in _UNPACK_FORMATS.items():
        for extension in info[0]:
            if filename.endswith(extension):
                return name
    return None

def unpack_archive(filename, extract_dir=None, format=None):
    """Unpack an archive.

    `filename` is the name of the archive.

    `extract_dir` is the name of the target directory, where the archive
    is unpacked. If not provided, the current working directory is used.

    `format` is the archive format: one of "zip", "tar", or "gztar". Or any
    other registered format. If not provided, unpack_archive will use the
    filename extension and see if an unpacker was registered for that
    extension.

    In case none is found, a ValueError is raised.
    """
    if extract_dir is None:
        extract_dir = os.getcwd()

    if format is not None:
        try:
            format_info = _UNPACK_FORMATS[format]
        except KeyError:
            raise ValueError("Unknown unpack format '{0}'".format(format))

804 805
        func = format_info[1]
        func(filename, extract_dir, **dict(format_info[2]))
806 807 808 809 810 811 812 813 814
    else:
        # we need to look at the registered unpackers supported extensions
        format = _find_unpack_format(filename)
        if format is None:
            raise ReadError("Unknown archive format '{0}'".format(filename))

        func = _UNPACK_FORMATS[format][1]
        kwargs = dict(_UNPACK_FORMATS[format][2])
        func(filename, extract_dir, **kwargs)
815

816 817 818 819 820

if hasattr(os, 'statvfs'):

    __all__.append('disk_usage')
    _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
821 822

    def disk_usage(path):
823 824
        """Return disk usage statistics about the given path.

825
        Returned value is a named tuple with attributes 'total', 'used' and
826
        'free', which are the amount of total, used and free space, in bytes.
827
        """
828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847
        st = os.statvfs(path)
        free = st.f_bavail * st.f_frsize
        total = st.f_blocks * st.f_frsize
        used = (st.f_blocks - st.f_bfree) * st.f_frsize
        return _ntuple_diskusage(total, used, free)

elif os.name == 'nt':

    import nt
    __all__.append('disk_usage')
    _ntuple_diskusage = collections.namedtuple('usage', 'total used free')

    def disk_usage(path):
        """Return disk usage statistics about the given path.

        Returned valus is a named tuple with attributes 'total', 'used' and
        'free', which are the amount of total, used and free space, in bytes.
        """
        total, free = nt._getdiskusage(path)
        used = total - free
848
        return _ntuple_diskusage(total, used, free)
849

850

851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880
def chown(path, user=None, group=None):
    """Change owner user and group of the given path.

    user and group can be the uid/gid or the user/group names, and in that case,
    they are converted to their respective uid/gid.
    """

    if user is None and group is None:
        raise ValueError("user and/or group must be set")

    _user = user
    _group = group

    # -1 means don't change it
    if user is None:
        _user = -1
    # user can either be an int (the uid) or a string (the system username)
    elif isinstance(user, str):
        _user = _get_uid(user)
        if _user is None:
            raise LookupError("no such user: {!r}".format(user))

    if group is None:
        _group = -1
    elif not isinstance(group, int):
        _group = _get_gid(group)
        if _group is None:
            raise LookupError("no such group: {!r}".format(group))

    os.chown(path, _user, _group)
881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923

def get_terminal_size(fallback=(80, 24)):
    """Get the size of the terminal window.

    For each of the two dimensions, the environment variable, COLUMNS
    and LINES respectively, is checked. If the variable is defined and
    the value is a positive integer, it is used.

    When COLUMNS or LINES is not defined, which is the common case,
    the terminal connected to sys.__stdout__ is queried
    by invoking os.get_terminal_size.

    If the terminal size cannot be successfully queried, either because
    the system doesn't support querying, or because we are not
    connected to a terminal, the value given in fallback parameter
    is used. Fallback defaults to (80, 24) which is the default
    size used by many terminal emulators.

    The value returned is a named tuple of type os.terminal_size.
    """
    # columns, lines are the working values
    try:
        columns = int(os.environ['COLUMNS'])
    except (KeyError, ValueError):
        columns = 0

    try:
        lines = int(os.environ['LINES'])
    except (KeyError, ValueError):
        lines = 0

    # only query if necessary
    if columns <= 0 or lines <= 0:
        try:
            size = os.get_terminal_size(sys.__stdout__.fileno())
        except (NameError, OSError):
            size = os.terminal_size(fallback)
        if columns <= 0:
            columns = size.columns
        if lines <= 0:
            lines = size.lines

    return os.terminal_size((columns, lines))