ntpath.py 22.6 KB
Newer Older
1
# Module 'ntpath' -- common operations on WinNT/Win95 pathnames
Tim Peters's avatar
Tim Peters committed
2
"""Common pathname manipulations, WindowsNT/95 version.
3 4 5

Instead of importing this module directly, import os and refer to this
module as os.path.
6
"""
7 8

import os
9
import sys
10
import stat
11
import genericpath
12
from genericpath import *
13

14 15
__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
           "basename","dirname","commonprefix","getsize","getmtime",
16
           "getatime","getctime", "islink","exists","lexists","isdir","isfile",
Benjamin Peterson's avatar
Benjamin Peterson committed
17
           "ismount", "expanduser","expandvars","normpath","abspath",
18
           "splitunc","curdir","pardir","sep","pathsep","defpath","altsep",
19
           "extsep","devnull","realpath","supports_unicode_filenames","relpath",
20
           "samefile", "sameopenfile", "samestat", "commonpath"]
21

22
# strings representing various path-related bits and pieces
23
# These are primarily for export; internally, they are hardcoded.
24 25 26 27 28
curdir = '.'
pardir = '..'
extsep = '.'
sep = '\\'
pathsep = ';'
29
altsep = '/'
30
defpath = '.;C:\\bin'
31
devnull = 'nul'
32

33 34 35 36 37 38
def _get_bothseps(path):
    if isinstance(path, bytes):
        return b'\\/'
    else:
        return '\\/'

39 40
# Normalize the case of a pathname and map slashes to backslashes.
# Other normalizations (such as optimizing '../' away) are not done
41
# (this is done by normpath).
42

43
def normcase(s):
44 45
    """Normalize case of pathname.

46
    Makes all characters lowercase and all slashes into backslashes."""
47
    s = os.fspath(s)
48 49 50 51 52 53 54 55 56 57
    try:
        if isinstance(s, bytes):
            return s.replace(b'/', b'\\').lower()
        else:
            return s.replace('/', '\\').lower()
    except (TypeError, AttributeError):
        if not isinstance(s, (bytes, str)):
            raise TypeError("normcase() argument must be str or bytes, "
                            "not %r" % s.__class__.__name__) from None
        raise
58

59

60
# Return whether a path is absolute.
61 62 63
# Trivial in Posix, harder on Windows.
# For Windows it is absolute if it starts with a slash or backslash (current
# volume), or if a pathname after the volume-letter-and-colon or UNC-resource
64
# starts with a slash or backslash.
65 66

def isabs(s):
67
    """Test whether a path is absolute"""
68
    s = os.fspath(s)
69
    s = splitdrive(s)[1]
70
    return len(s) > 0 and s[0] in _get_bothseps(s)
71 72


73
# Join two (or more) paths.
74
def join(path, *paths):
75
    path = os.fspath(path)
76 77 78 79 80 81 82 83
    if isinstance(path, bytes):
        sep = b'\\'
        seps = b'\\/'
        colon = b':'
    else:
        sep = '\\'
        seps = '\\/'
        colon = ':'
84
    try:
85 86
        if not paths:
            path[:0] + sep  #23780: Ensure compatible data type even if p is null.
87
        result_drive, result_path = splitdrive(path)
88
        for p in map(os.fspath, paths):
89 90 91 92 93
            p_drive, p_path = splitdrive(p)
            if p_path and p_path[0] in seps:
                # Second path is absolute
                if p_drive or not result_drive:
                    result_drive = p_drive
94 95
                result_path = p_path
                continue
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
            elif p_drive and p_drive != result_drive:
                if p_drive.lower() != result_drive.lower():
                    # Different drives => ignore the first path entirely
                    result_drive = p_drive
                    result_path = p_path
                    continue
                # Same drive in different case
                result_drive = p_drive
            # Second path is relative to the first
            if result_path and result_path[-1] not in seps:
                result_path = result_path + sep
            result_path = result_path + p_path
        ## add separator between UNC and non-absolute path
        if (result_path and result_path[0] not in seps and
            result_drive and result_drive[-1:] != colon):
            return result_drive + sep + result_path
        return result_drive + result_path
    except (TypeError, AttributeError, BytesWarning):
        genericpath._check_arg_types('join', path, *paths)
        raise
116 117 118


# Split a path in a drive specification (a drive letter followed by a
119
# colon) and the path specification.
120 121
# It is always true that drivespec + pathspec == p
def splitdrive(p):
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
    """Split a pathname into drive/UNC sharepoint and relative path specifiers.
    Returns a 2-tuple (drive_or_unc, path); either part may be empty.

    If you assign
        result = splitdrive(p)
    It is always true that:
        result[0] + result[1] == p

    If the path contained a drive letter, drive_or_unc will contain everything
    up to and including the colon.  e.g. splitdrive("c:/dir") returns ("c:", "/dir")

    If the path contained a UNC path, the drive_or_unc will contain the host name
    and share up to but not including the fourth directory separator character.
    e.g. splitdrive("//host/computer/dir") returns ("//host/computer", "/dir")

    Paths cannot contain both a drive letter and a UNC path.

    """
140
    p = os.fspath(p)
141 142 143 144 145 146 147 148 149 150
    if len(p) >= 2:
        if isinstance(p, bytes):
            sep = b'\\'
            altsep = b'/'
            colon = b':'
        else:
            sep = '\\'
            altsep = '/'
            colon = ':'
        normp = p.replace(altsep, sep)
151 152 153 154 155 156 157
        if (normp[0:2] == sep*2) and (normp[2:3] != sep):
            # is a UNC path:
            # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
            # \\machine\mountpoint\directory\etc\...
            #           directory ^^^^^^^^^^^^^^^
            index = normp.find(sep, 2)
            if index == -1:
158
                return p[:0], p
159 160 161 162
            index2 = normp.find(sep, index + 1)
            # a UNC path can't have two slashes in a row
            # (after the initial two)
            if index2 == index + 1:
163
                return p[:0], p
164 165 166
            if index2 == -1:
                index2 = len(p)
            return p[:index2], p[index2:]
167
        if normp[1:2] == colon:
168
            return p[:2], p[2:]
169
    return p[:0], p
170 171 172 173


# Parse UNC paths
def splitunc(p):
174 175 176 177
    """Deprecated since Python 3.1.  Please use splitdrive() instead;
    it now handles UNC paths.

    Split a pathname into UNC mount point and relative path specifiers.
178 179 180 181

    Return a 2-tuple (unc, rest); either part may be empty.
    If unc is not empty, it has the form '//host/mount' (or similar
    using backslashes).  unc+rest is always the input path.
182
    Paths containing drive letters never have a UNC part.
183
    """
184 185
    import warnings
    warnings.warn("ntpath.splitunc is deprecated, use ntpath.splitdrive instead",
186 187 188 189 190 191
                  DeprecationWarning, 2)
    drive, path = splitdrive(p)
    if len(drive) == 2:
         # Drive letter present
        return p[:0], p
    return drive, path
192 193 194


# Split a path in head (everything up to the last '/') and tail (the
195
# rest).  After the trailing '/' is stripped, the invariant
196 197 198 199
# join(head, tail) == p holds.
# The resulting head won't end in '/' unless it is the root.

def split(p):
200 201 202 203
    """Split a pathname.

    Return tuple (head, tail) where tail is everything after the final slash.
    Either part may be empty."""
204
    p = os.fspath(p)
205
    seps = _get_bothseps(p)
206
    d, p = splitdrive(p)
207 208
    # set i to index beyond p's last slash
    i = len(p)
209
    while i and p[i-1] not in seps:
Georg Brandl's avatar
Georg Brandl committed
210
        i -= 1
211 212
    head, tail = p[:i], p[i:]  # now tail has no slashes
    # remove trailing slashes from head, unless it's all slashes
213
    head = head.rstrip(seps) or head
214
    return d + head, tail
215 216 217


# Split a path in root and extension.
218
# The extension is everything starting at the last dot in the last
219 220 221 222
# pathname component; the root is everything before that.
# It is always true that root + ext == p.

def splitext(p):
223
    p = os.fspath(p)
224 225 226 227
    if isinstance(p, bytes):
        return genericpath._splitext(p, b'\\', b'/', b'.')
    else:
        return genericpath._splitext(p, '\\', '/', '.')
228
splitext.__doc__ = genericpath._splitext.__doc__
229 230 231 232 233


# Return the tail (basename) part of a path.

def basename(p):
234 235
    """Returns the final component of a pathname"""
    return split(p)[1]
236 237 238 239 240


# Return the head (dirname) part of a path.

def dirname(p):
241 242
    """Returns the directory component of a pathname"""
    return split(p)[0]
243 244

# Is a path a symbolic link?
245
# This will always return false on systems where os.lstat doesn't exist.
246 247

def islink(path):
248
    """Test whether a path is a symbolic link.
249
    This will always return false for Windows prior to 6.0.
250
    """
251 252
    try:
        st = os.lstat(path)
253
    except (OSError, AttributeError):
254 255 256 257 258 259 260 261 262
        return False
    return stat.S_ISLNK(st.st_mode)

# Being true for dangling symbolic links is also useful.

def lexists(path):
    """Test whether a path exists.  Returns True for broken symbolic links"""
    try:
        st = os.lstat(path)
263
    except OSError:
264 265
        return False
    return True
266

267 268 269 270 271 272 273 274 275 276 277 278 279 280
# Is a path a mount point?
# Any drive letter root (eg c:\)
# Any share UNC (eg \\server\share)
# Any volume mounted on a filesystem folder
#
# No one method detects all three situations. Historically we've lexically
# detected drive letter roots and share UNCs. The canonical approach to
# detecting mounted volumes (querying the reparse tag) fails for the most
# common case: drive letter roots. The alternative which uses GetVolumePathName
# fails if the drive letter is the result of a SUBST.
try:
    from nt import _getvolumepathname
except ImportError:
    _getvolumepathname = None
281
def ismount(path):
282 283
    """Test whether a path is a mount point (a drive root, the root of a
    share, or a mounted volume)"""
284
    path = os.fspath(path)
285
    seps = _get_bothseps(path)
286
    path = abspath(path)
287 288 289
    root, rest = splitdrive(path)
    if root and root[0] in seps:
        return (not rest) or (rest in seps)
290 291 292 293 294 295 296
    if rest in seps:
        return True

    if _getvolumepathname:
        return path.rstrip(seps) == _getvolumepathname(path).rstrip(seps)
    else:
        return False
297 298 299 300 301 302 303 304 305 306 307 308


# Expand paths beginning with '~' or '~user'.
# '~' means $HOME; '~user' means that user's home directory.
# If the path doesn't begin with '~', or if the user or $HOME is unknown,
# the path is returned unchanged (leaving error reporting to whatever
# function is called with the expanded path as argument).
# See also module 'glob' for expansion of *, ? and [...] in pathnames.
# (A function should also be defined to do full *sh-style environment
# variable expansion.)

def expanduser(path):
309 310 311
    """Expand ~ and ~user constructs.

    If user or $HOME is unknown, do nothing."""
312
    path = os.fspath(path)
313 314 315 316 317
    if isinstance(path, bytes):
        tilde = b'~'
    else:
        tilde = '~'
    if not path.startswith(tilde):
318 319
        return path
    i, n = 1, len(path)
320
    while i < n and path[i] not in _get_bothseps(path):
Georg Brandl's avatar
Georg Brandl committed
321
        i += 1
322 323 324 325 326 327

    if 'HOME' in os.environ:
        userhome = os.environ['HOME']
    elif 'USERPROFILE' in os.environ:
        userhome = os.environ['USERPROFILE']
    elif not 'HOMEPATH' in os.environ:
328
        return path
329 330 331 332 333 334 335
    else:
        try:
            drive = os.environ['HOMEDRIVE']
        except KeyError:
            drive = ''
        userhome = join(drive, os.environ['HOMEPATH'])

336
    if isinstance(path, bytes):
337
        userhome = os.fsencode(userhome)
338

339 340 341
    if i != 1: #~user
        userhome = join(dirname(userhome), path[1:i])

342
    return userhome + path[i:]
343 344 345 346


# Expand paths containing shell variable substitutions.
# The following rules apply:
347
#       - no expansion within single quotes
348 349
#       - '$$' is translated into '$'
#       - '%%' is translated into '%' if '%%' are not seen in %var1%%var2%
350
#       - ${varname} is accepted.
351 352 353
#       - $varname is accepted.
#       - %varname% is accepted.
#       - varnames can be made out of letters, digits and the characters '_-'
354
#         (though is not verified in the ${varname} and %varname% cases)
355 356 357
# XXX With COMMAND.COM you can use any characters in a variable name,
# XXX except '^|<>='.

Tim Peters's avatar
Tim Peters committed
358
def expandvars(path):
359
    """Expand shell variables of the forms $var, ${var} and %var%.
360 361

    Unknown variables are left unchanged."""
362
    path = os.fspath(path)
363
    if isinstance(path, bytes):
364
        if b'$' not in path and b'%' not in path:
365 366 367
            return path
        import string
        varchars = bytes(string.ascii_letters + string.digits + '_-', 'ascii')
368 369 370
        quote = b'\''
        percent = b'%'
        brace = b'{'
371
        rbrace = b'}'
372
        dollar = b'$'
373
        environ = getattr(os, 'environb', None)
374 375 376 377 378
    else:
        if '$' not in path and '%' not in path:
            return path
        import string
        varchars = string.ascii_letters + string.digits + '_-'
379 380 381
        quote = '\''
        percent = '%'
        brace = '{'
382
        rbrace = '}'
383
        dollar = '$'
384
        environ = os.environ
385
    res = path[:0]
386 387 388
    index = 0
    pathlen = len(path)
    while index < pathlen:
389
        c = path[index:index+1]
390
        if c == quote:   # no expansion within single quotes
391 392 393
            path = path[index + 1:]
            pathlen = len(path)
            try:
394
                index = path.index(c)
Georg Brandl's avatar
Georg Brandl committed
395
                res += c + path[:index + 1]
396
            except ValueError:
397
                res += c + path
398
                index = pathlen - 1
399
        elif c == percent:  # variable or '%'
400
            if path[index + 1:index + 2] == percent:
Georg Brandl's avatar
Georg Brandl committed
401 402
                res += c
                index += 1
403 404 405 406
            else:
                path = path[index+1:]
                pathlen = len(path)
                try:
407
                    index = path.index(percent)
408
                except ValueError:
Georg Brandl's avatar
Georg Brandl committed
409
                    res += percent + path
410 411 412
                    index = pathlen - 1
                else:
                    var = path[:index]
413 414 415 416 417 418 419
                    try:
                        if environ is None:
                            value = os.fsencode(os.environ[os.fsdecode(var)])
                        else:
                            value = environ[var]
                    except KeyError:
                        value = percent + var + percent
Georg Brandl's avatar
Georg Brandl committed
420
                    res += value
421 422
        elif c == dollar:  # variable or '$$'
            if path[index + 1:index + 2] == dollar:
Georg Brandl's avatar
Georg Brandl committed
423 424
                res += c
                index += 1
425
            elif path[index + 1:index + 2] == brace:
426 427 428
                path = path[index+2:]
                pathlen = len(path)
                try:
429
                    index = path.index(rbrace)
430
                except ValueError:
431
                    res += dollar + brace + path
432
                    index = pathlen - 1
433 434 435 436 437 438 439 440
                else:
                    var = path[:index]
                    try:
                        if environ is None:
                            value = os.fsencode(os.environ[os.fsdecode(var)])
                        else:
                            value = environ[var]
                    except KeyError:
441
                        value = dollar + brace + var + rbrace
442
                    res += value
443
            else:
444
                var = path[:0]
Georg Brandl's avatar
Georg Brandl committed
445
                index += 1
446
                c = path[index:index + 1]
447
                while c and c in varchars:
448
                    var += c
Georg Brandl's avatar
Georg Brandl committed
449
                    index += 1
450
                    c = path[index:index + 1]
451 452 453 454 455 456 457
                try:
                    if environ is None:
                        value = os.fsencode(os.environ[os.fsdecode(var)])
                    else:
                        value = environ[var]
                except KeyError:
                    value = dollar + var
Georg Brandl's avatar
Georg Brandl committed
458
                res += value
459
                if c:
Georg Brandl's avatar
Georg Brandl committed
460
                    index -= 1
461
        else:
Georg Brandl's avatar
Georg Brandl committed
462 463
            res += c
        index += 1
464
    return res
465 466


467
# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.
468 469
# Previously, this function also truncated pathnames to 8+3 format,
# but as this module is called "ntpath", that's obviously wrong!
470 471

def normpath(path):
472
    """Normalize path, eliminating double slashes, etc."""
473
    path = os.fspath(path)
474 475 476 477 478 479 480 481 482 483 484 485
    if isinstance(path, bytes):
        sep = b'\\'
        altsep = b'/'
        curdir = b'.'
        pardir = b'..'
        special_prefixes = (b'\\\\.\\', b'\\\\?\\')
    else:
        sep = '\\'
        altsep = '/'
        curdir = '.'
        pardir = '..'
        special_prefixes = ('\\\\.\\', '\\\\?\\')
486 487 488 489 490 491
    if path.startswith(special_prefixes):
        # in the case of paths with these prefixes:
        # \\.\ -> device names
        # \\?\ -> literal paths
        # do not do any normalization, but return the path unchanged
        return path
492
    path = path.replace(altsep, sep)
493
    prefix, path = splitdrive(path)
494 495 496

    # collapse initial backslashes
    if path.startswith(sep):
Georg Brandl's avatar
Georg Brandl committed
497
        prefix += sep
498 499
        path = path.lstrip(sep)

500
    comps = path.split(sep)
501 502
    i = 0
    while i < len(comps):
503
        if not comps[i] or comps[i] == curdir:
504
            del comps[i]
505 506
        elif comps[i] == pardir:
            if i > 0 and comps[i-1] != pardir:
507 508
                del comps[i-1:i+1]
                i -= 1
509
            elif i == 0 and prefix.endswith(sep):
510 511 512
                del comps[i]
            else:
                i += 1
513
        else:
514
            i += 1
515 516
    # If the path is now empty, substitute '.'
    if not prefix and not comps:
517
        comps.append(curdir)
518
    return prefix + sep.join(comps)
Guido van Rossum's avatar
Guido van Rossum committed
519 520 521


# Return an absolute path.
522 523 524
try:
    from nt import _getfullpathname

525
except ImportError: # not running on Windows - mock up something sensible
526 527
    def abspath(path):
        """Return the absolute version of a path."""
528
        path = os.fspath(path)
529
        if not isabs(path):
530 531 532 533 534
            if isinstance(path, bytes):
                cwd = os.getcwdb()
            else:
                cwd = os.getcwd()
            path = join(cwd, path)
535 536 537 538 539 540 541
        return normpath(path)

else:  # use native Windows method on Windows
    def abspath(path):
        """Return the absolute version of a path."""

        if path: # Empty path must return current working directory.
542
            path = os.fspath(path)
543 544
            try:
                path = _getfullpathname(path)
545
            except OSError:
546
                pass # Bad path - return unchanged.
547 548
        elif isinstance(path, bytes):
            path = os.getcwdb()
549 550 551
        else:
            path = os.getcwd()
        return normpath(path)
552 553 554

# realpath is a no-op on systems without islink support
realpath = abspath
555
# Win9x family and earlier have no Unicode filename support.
556 557
supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
                              sys.getwindowsversion()[3] >= 2)
558

559
def relpath(path, start=None):
560
    """Return a relative version of a path"""
561
    path = os.fspath(path)
562 563 564 565 566 567 568 569
    if isinstance(path, bytes):
        sep = b'\\'
        curdir = b'.'
        pardir = b'..'
    else:
        sep = '\\'
        curdir = '.'
        pardir = '..'
570

571 572
    if start is None:
        start = curdir
573 574 575

    if not path:
        raise ValueError("no path specified")
576

577
    start = os.fspath(start)
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
    try:
        start_abs = abspath(normpath(start))
        path_abs = abspath(normpath(path))
        start_drive, start_rest = splitdrive(start_abs)
        path_drive, path_rest = splitdrive(path_abs)
        if normcase(start_drive) != normcase(path_drive):
            raise ValueError("path is on mount %r, start on mount %r" % (
                path_drive, start_drive))

        start_list = [x for x in start_rest.split(sep) if x]
        path_list = [x for x in path_rest.split(sep) if x]
        # Work out how much of the filepath is shared by start and path.
        i = 0
        for e1, e2 in zip(start_list, path_list):
            if normcase(e1) != normcase(e2):
                break
            i += 1
595

596 597 598 599
        rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
        if not rel_list:
            return curdir
        return join(*rel_list)
600
    except (TypeError, ValueError, AttributeError, BytesWarning, DeprecationWarning):
601 602
        genericpath._check_arg_types('relpath', path, start)
        raise
603 604


605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
# Return the longest common sub-path of the sequence of paths given as input.
# The function is case-insensitive and 'separator-insensitive', i.e. if the
# only difference between two paths is the use of '\' versus '/' as separator,
# they are deemed to be equal.
#
# However, the returned path will have the standard '\' separator (even if the
# given paths had the alternative '/' separator) and will have the case of the
# first path given in the sequence. Additionally, any trailing separator is
# stripped from the returned path.

def commonpath(paths):
    """Given a sequence of path names, returns the longest common sub-path."""

    if not paths:
        raise ValueError('commonpath() arg is an empty sequence')

621
    paths = tuple(map(os.fspath, paths))
622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
    if isinstance(paths[0], bytes):
        sep = b'\\'
        altsep = b'/'
        curdir = b'.'
    else:
        sep = '\\'
        altsep = '/'
        curdir = '.'

    try:
        drivesplits = [splitdrive(p.replace(altsep, sep).lower()) for p in paths]
        split_paths = [p.split(sep) for d, p in drivesplits]

        try:
            isabs, = set(p[:1] == sep for d, p in drivesplits)
        except ValueError:
            raise ValueError("Can't mix absolute and relative paths") from None

        # Check that all drive letters or UNC paths match. The check is made only
        # now otherwise type errors for mixing strings and bytes would not be
        # caught.
        if len(set(d for d, p in drivesplits)) != 1:
            raise ValueError("Paths don't have the same drive")

        drive, path = splitdrive(paths[0].replace(altsep, sep))
        common = path.split(sep)
        common = [c for c in common if c and c != curdir]

        split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
        s1 = min(split_paths)
        s2 = max(split_paths)
        for i, c in enumerate(s1):
            if c != s2[i]:
                common = common[:i]
                break
        else:
            common = common[:len(s1)]

        prefix = drive + sep if isabs else drive
        return prefix + sep.join(common)
    except (TypeError, AttributeError):
        genericpath._check_arg_types('commonpath', *paths)
        raise


667
# determine if two files are in fact the same file
668
try:
669 670 671 672 673 674 675
    # GetFinalPathNameByHandle is available starting with Windows 6.0.
    # Windows XP and non-Windows OS'es will mock _getfinalpathname.
    if sys.getwindowsversion()[:2] >= (6, 0):
        from nt import _getfinalpathname
    else:
        raise ImportError
except (AttributeError, ImportError):
676 677
    # On Windows XP and earlier, two files are the same if their absolute
    # pathnames are the same.
678 679
    # Non-Windows operating systems fake this method with an XP
    # approximation.
680
    def _getfinalpathname(f):
681
        return normcase(abspath(f))
682

683 684 685 686 687 688

try:
    # The genericpath.isdir implementation uses os.stat and checks the mode
    # attribute to tell whether or not the path is a directory.
    # This is overkill on Windows - just pass the path to GetFileAttributes
    # and check the attribute from there.
689
    from nt import _isdir as isdir
690
except ImportError:
691 692
    # Use genericpath.isdir as imported above.
    pass