ntpath.py 22.1 KB
Newer Older
1
# Module 'ntpath' -- common operations on WinNT/Win95 pathnames
Tim Peters's avatar
Tim Peters committed
2
"""Common pathname manipulations, WindowsNT/95 version.
3 4 5

Instead of importing this module directly, import os and refer to this
module as os.path.
6
"""
7 8

import os
9
import sys
10
import stat
11
import genericpath
12
from genericpath import *
13

14 15
__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
           "basename","dirname","commonprefix","getsize","getmtime",
16
           "getatime","getctime", "islink","exists","lexists","isdir","isfile",
Benjamin Peterson's avatar
Benjamin Peterson committed
17
           "ismount", "expanduser","expandvars","normpath","abspath",
18
           "splitunc","curdir","pardir","sep","pathsep","defpath","altsep",
19
           "extsep","devnull","realpath","supports_unicode_filenames","relpath",
20
           "samefile", "sameopenfile",]
21

22
# strings representing various path-related bits and pieces
23
# These are primarily for export; internally, they are hardcoded.
24 25 26 27 28
curdir = '.'
pardir = '..'
extsep = '.'
sep = '\\'
pathsep = ';'
29
altsep = '/'
30
defpath = '.;C:\\bin'
31 32
if 'ce' in sys.builtin_module_names:
    defpath = '\\Windows'
33
devnull = 'nul'
34

35 36 37 38 39 40
def _get_empty(path):
    if isinstance(path, bytes):
        return b''
    else:
        return ''

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
def _get_sep(path):
    if isinstance(path, bytes):
        return b'\\'
    else:
        return '\\'

def _get_altsep(path):
    if isinstance(path, bytes):
        return b'/'
    else:
        return '/'

def _get_bothseps(path):
    if isinstance(path, bytes):
        return b'\\/'
    else:
        return '\\/'

def _get_dot(path):
    if isinstance(path, bytes):
        return b'.'
    else:
        return '.'

def _get_colon(path):
    if isinstance(path, bytes):
        return b':'
    else:
        return ':'

71 72 73 74 75 76
def _get_special(path):
    if isinstance(path, bytes):
        return (b'\\\\.\\', b'\\\\?\\')
    else:
        return ('\\\\.\\', '\\\\?\\')

77 78
# Normalize the case of a pathname and map slashes to backslashes.
# Other normalizations (such as optimizing '../' away) are not done
79
# (this is done by normpath).
80

81
def normcase(s):
82 83
    """Normalize case of pathname.

84
    Makes all characters lowercase and all slashes into backslashes."""
85 86 87
    if not isinstance(s, (bytes, str)):
        raise TypeError("normcase() argument must be str or bytes, "
                        "not '{}'".format(s.__class__.__name__))
88
    return s.replace(_get_altsep(s), _get_sep(s)).lower()
89

90

91
# Return whether a path is absolute.
92 93 94
# Trivial in Posix, harder on Windows.
# For Windows it is absolute if it starts with a slash or backslash (current
# volume), or if a pathname after the volume-letter-and-colon or UNC-resource
95
# starts with a slash or backslash.
96 97

def isabs(s):
98 99
    """Test whether a path is absolute"""
    s = splitdrive(s)[1]
100
    return len(s) > 0 and s[:1] in _get_bothseps(s)
101 102


103 104
# Join two (or more) paths.

105
def join(a, *p):
106 107 108
    """Join two or more pathname components, inserting "\\" as needed.
    If any component is an absolute path, all previous path components
    will be discarded."""
109 110 111
    sep = _get_sep(a)
    seps = _get_bothseps(a)
    colon = _get_colon(a)
112 113
    path = a
    for b in p:
114
        b_wins = 0  # set to 1 iff b makes path irrelevant
115
        if not path:
116 117 118 119
            b_wins = 1

        elif isabs(b):
            # This probably wipes out path so far.  However, it's more
120 121
            # complicated if path begins with a drive letter.  You get a+b
            # (minus redundant slashes) in these four cases:
122
            #     1. join('c:', '/a') == 'c:/a'
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
            #     2. join('//computer/share', '/a') == '//computer/share/a'
            #     3. join('c:/', '/a') == 'c:/a'
            #     4. join('//computer/share/', '/a') == '//computer/share/a'
            # But b wins in all of these cases:
            #     5. join('c:/a', '/b') == '/b'
            #     6. join('//computer/share/a', '/b') == '/b'
            #     7. join('c:', 'd:/') == 'd:/'
            #     8. join('c:', '//computer/share/') == '//computer/share/'
            #     9. join('//computer/share', 'd:/') == 'd:/'
            #    10. join('//computer/share', '//computer/share/') == '//computer/share/'
            #    11. join('c:/', 'd:/') == 'd:/'
            #    12. join('c:/', '//computer/share/') == '//computer/share/'
            #    13. join('//computer/share/', 'd:/') == 'd:/'
            #    14. join('//computer/share/', '//computer/share/') == '//computer/share/'
            b_prefix, b_rest = splitdrive(b)

            # if b has a prefix, it always wins.
            if b_prefix:
141
                b_wins = 1
142 143 144 145 146 147 148 149 150 151 152 153
            else:
                # b doesn't have a prefix.
                # but isabs(b) returned true.
                # and therefore b_rest[0] must be a slash.
                # (but let's check that.)
                assert(b_rest and b_rest[0] in seps)

                # so, b still wins if path has a rest that's more than a sep.
                # you get a+b if path_rest is empty or only has a sep.
                # (see cases 1-4 for times when b loses.)
                path_rest = splitdrive(path)[1]
                b_wins = path_rest and path_rest not in seps
154 155 156 157 158 159

        if b_wins:
            path = b
        else:
            # Join, and ensure there's a separator.
            assert len(path) > 0
160 161
            if path[-1:] in seps:
                if b and b[:1] in seps:
162 163 164
                    path += b[1:]
                else:
                    path += b
165
            elif path[-1:] == colon:
166 167
                path += b
            elif b:
168
                if b[:1] in seps:
169 170
                    path += b
                else:
171
                    path += sep + b
172 173 174 175 176
            else:
                # path is not empty and does not end with a backslash,
                # but b is empty; since, e.g., split('a/') produces
                # ('a', ''), it's best if join() adds a backslash in
                # this case.
177
                path += sep
178

179
    return path
180 181 182


# Split a path in a drive specification (a drive letter followed by a
183
# colon) and the path specification.
184 185
# It is always true that drivespec + pathspec == p
def splitdrive(p):
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
    """Split a pathname into drive/UNC sharepoint and relative path specifiers.
    Returns a 2-tuple (drive_or_unc, path); either part may be empty.

    If you assign
        result = splitdrive(p)
    It is always true that:
        result[0] + result[1] == p

    If the path contained a drive letter, drive_or_unc will contain everything
    up to and including the colon.  e.g. splitdrive("c:/dir") returns ("c:", "/dir")

    If the path contained a UNC path, the drive_or_unc will contain the host name
    and share up to but not including the fourth directory separator character.
    e.g. splitdrive("//host/computer/dir") returns ("//host/computer", "/dir")

    Paths cannot contain both a drive letter and a UNC path.

    """
    empty = _get_empty(p)
    if len(p) > 1:
        sep = _get_sep(p)
        normp = normcase(p)
        if (normp[0:2] == sep*2) and (normp[2:3] != sep):
            # is a UNC path:
            # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
            # \\machine\mountpoint\directory\etc\...
            #           directory ^^^^^^^^^^^^^^^
            index = normp.find(sep, 2)
            if index == -1:
                return empty, p
            index2 = normp.find(sep, index + 1)
            # a UNC path can't have two slashes in a row
            # (after the initial two)
            if index2 == index + 1:
                return empty, p
            if index2 == -1:
                index2 = len(p)
            return p[:index2], p[index2:]
        if normp[1:2] == _get_colon(p):
            return p[:2], p[2:]
    return empty, p
227 228 229 230


# Parse UNC paths
def splitunc(p):
231 232 233 234
    """Deprecated since Python 3.1.  Please use splitdrive() instead;
    it now handles UNC paths.

    Split a pathname into UNC mount point and relative path specifiers.
235 236 237 238 239 240

    Return a 2-tuple (unc, rest); either part may be empty.
    If unc is not empty, it has the form '//host/mount' (or similar
    using backslashes).  unc+rest is always the input path.
    Paths containing drive letters never have an UNC part.
    """
241 242
    import warnings
    warnings.warn("ntpath.splitunc is deprecated, use ntpath.splitdrive instead",
243
                  DeprecationWarning)
244 245 246
    sep = _get_sep(p)
    if not p[1:2]:
        return p[:0], p # Drive letter present
247
    firstTwo = p[0:2]
248
    if normcase(firstTwo) == sep + sep:
249 250 251 252 253
        # is a UNC path:
        # vvvvvvvvvvvvvvvvvvvv equivalent to drive letter
        # \\machine\mountpoint\directories...
        #           directory ^^^^^^^^^^^^^^^
        normp = normcase(p)
254
        index = normp.find(sep, 2)
255 256
        if index == -1:
            ##raise RuntimeError, 'illegal UNC path: "' + p + '"'
257 258
            return (p[:0], p)
        index = normp.find(sep, index + 1)
259 260 261
        if index == -1:
            index = len(p)
        return p[:index], p[index:]
262
    return p[:0], p
263 264 265


# Split a path in head (everything up to the last '/') and tail (the
266
# rest).  After the trailing '/' is stripped, the invariant
267 268 269 270
# join(head, tail) == p holds.
# The resulting head won't end in '/' unless it is the root.

def split(p):
271 272 273 274
    """Split a pathname.

    Return tuple (head, tail) where tail is everything after the final slash.
    Either part may be empty."""
275

276
    seps = _get_bothseps(p)
277
    d, p = splitdrive(p)
278 279
    # set i to index beyond p's last slash
    i = len(p)
280
    while i and p[i-1] not in seps:
Georg Brandl's avatar
Georg Brandl committed
281
        i -= 1
282 283 284
    head, tail = p[:i], p[i:]  # now tail has no slashes
    # remove trailing slashes from head, unless it's all slashes
    head2 = head
285
    while head2 and head2[-1:] in seps:
286 287
        head2 = head2[:-1]
    head = head2 or head
288
    return d + head, tail
289 290 291


# Split a path in root and extension.
292
# The extension is everything starting at the last dot in the last
293 294 295 296
# pathname component; the root is everything before that.
# It is always true that root + ext == p.

def splitext(p):
297 298
    return genericpath._splitext(p, _get_sep(p), _get_altsep(p),
                                 _get_dot(p))
299
splitext.__doc__ = genericpath._splitext.__doc__
300 301 302 303 304


# Return the tail (basename) part of a path.

def basename(p):
305 306
    """Returns the final component of a pathname"""
    return split(p)[1]
307 308 309 310 311


# Return the head (dirname) part of a path.

def dirname(p):
312 313
    """Returns the directory component of a pathname"""
    return split(p)[0]
314 315

# Is a path a symbolic link?
316
# This will always return false on systems where os.lstat doesn't exist.
317 318

def islink(path):
319
    """Test whether a path is a symbolic link.
320
    This will always return false for Windows prior to 6.0.
321
    """
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
    try:
        st = os.lstat(path)
    except (os.error, AttributeError):
        return False
    return stat.S_ISLNK(st.st_mode)

# Being true for dangling symbolic links is also useful.

def lexists(path):
    """Test whether a path exists.  Returns True for broken symbolic links"""
    try:
        st = os.lstat(path)
    except (os.error, WindowsError):
        return False
    return True
337

338 339
# Is a path a mount point?  Either a root (with or without drive letter)
# or an UNC path with at most a / or \ after the mount point.
340 341

def ismount(path):
342
    """Test whether a path is a mount point (defined as root of drive)"""
343
    seps = _get_bothseps(path)
344 345 346 347
    root, rest = splitdrive(path)
    if root and root[0] in seps:
        return (not rest) or (rest in seps)
    return rest in seps
348 349 350 351 352 353 354 355 356 357 358 359


# Expand paths beginning with '~' or '~user'.
# '~' means $HOME; '~user' means that user's home directory.
# If the path doesn't begin with '~', or if the user or $HOME is unknown,
# the path is returned unchanged (leaving error reporting to whatever
# function is called with the expanded path as argument).
# See also module 'glob' for expansion of *, ? and [...] in pathnames.
# (A function should also be defined to do full *sh-style environment
# variable expansion.)

def expanduser(path):
360 361 362
    """Expand ~ and ~user constructs.

    If user or $HOME is unknown, do nothing."""
363 364 365 366 367
    if isinstance(path, bytes):
        tilde = b'~'
    else:
        tilde = '~'
    if not path.startswith(tilde):
368 369
        return path
    i, n = 1, len(path)
370
    while i < n and path[i] not in _get_bothseps(path):
Georg Brandl's avatar
Georg Brandl committed
371
        i += 1
372 373 374 375 376 377

    if 'HOME' in os.environ:
        userhome = os.environ['HOME']
    elif 'USERPROFILE' in os.environ:
        userhome = os.environ['USERPROFILE']
    elif not 'HOMEPATH' in os.environ:
378
        return path
379 380 381 382 383 384 385
    else:
        try:
            drive = os.environ['HOMEDRIVE']
        except KeyError:
            drive = ''
        userhome = join(drive, os.environ['HOMEPATH'])

386 387 388
    if isinstance(path, bytes):
        userhome = userhome.encode(sys.getfilesystemencoding())

389 390 391
    if i != 1: #~user
        userhome = join(dirname(userhome), path[1:i])

392
    return userhome + path[i:]
393 394 395 396


# Expand paths containing shell variable substitutions.
# The following rules apply:
397
#       - no expansion within single quotes
398 399
#       - '$$' is translated into '$'
#       - '%%' is translated into '%' if '%%' are not seen in %var1%%var2%
400
#       - ${varname} is accepted.
401 402 403
#       - $varname is accepted.
#       - %varname% is accepted.
#       - varnames can be made out of letters, digits and the characters '_-'
404
#         (though is not verified in the ${varname} and %varname% cases)
405 406 407
# XXX With COMMAND.COM you can use any characters in a variable name,
# XXX except '^|<>='.

Tim Peters's avatar
Tim Peters committed
408
def expandvars(path):
409
    """Expand shell variables of the forms $var, ${var} and %var%.
410 411

    Unknown variables are left unchanged."""
412 413 414 415 416
    if isinstance(path, bytes):
        if ord('$') not in path and ord('%') not in path:
            return path
        import string
        varchars = bytes(string.ascii_letters + string.digits + '_-', 'ascii')
417 418 419 420
        quote = b'\''
        percent = b'%'
        brace = b'{'
        dollar = b'$'
421 422 423 424 425
    else:
        if '$' not in path and '%' not in path:
            return path
        import string
        varchars = string.ascii_letters + string.digits + '_-'
426 427 428 429
        quote = '\''
        percent = '%'
        brace = '{'
        dollar = '$'
430
    res = path[:0]
431 432 433
    index = 0
    pathlen = len(path)
    while index < pathlen:
434
        c = path[index:index+1]
435
        if c == quote:   # no expansion within single quotes
436 437 438
            path = path[index + 1:]
            pathlen = len(path)
            try:
439
                index = path.index(c)
Georg Brandl's avatar
Georg Brandl committed
440
                res += c + path[:index + 1]
441
            except ValueError:
Georg Brandl's avatar
Georg Brandl committed
442
                res += path
443
                index = pathlen - 1
444
        elif c == percent:  # variable or '%'
445
            if path[index + 1:index + 2] == percent:
Georg Brandl's avatar
Georg Brandl committed
446 447
                res += c
                index += 1
448 449 450 451
            else:
                path = path[index+1:]
                pathlen = len(path)
                try:
452
                    index = path.index(percent)
453
                except ValueError:
Georg Brandl's avatar
Georg Brandl committed
454
                    res += percent + path
455 456 457
                    index = pathlen - 1
                else:
                    var = path[:index]
458 459
                    if isinstance(path, bytes):
                        var = var.decode('ascii')
460
                    if var in os.environ:
461
                        value = os.environ[var]
462
                    else:
463 464 465
                        value = '%' + var + '%'
                    if isinstance(path, bytes):
                        value = value.encode('ascii')
Georg Brandl's avatar
Georg Brandl committed
466
                    res += value
467 468
        elif c == dollar:  # variable or '$$'
            if path[index + 1:index + 2] == dollar:
Georg Brandl's avatar
Georg Brandl committed
469 470
                res += c
                index += 1
471
            elif path[index + 1:index + 2] == brace:
472 473 474
                path = path[index+2:]
                pathlen = len(path)
                try:
475 476 477 478
                    if isinstance(path, bytes):
                        index = path.index(b'}')
                    else:
                        index = path.index('}')
479
                    var = path[:index]
480 481
                    if isinstance(path, bytes):
                        var = var.decode('ascii')
482
                    if var in os.environ:
483
                        value = os.environ[var]
484
                    else:
485 486 487
                        value = '${' + var + '}'
                    if isinstance(path, bytes):
                        value = value.encode('ascii')
Georg Brandl's avatar
Georg Brandl committed
488
                    res += value
489
                except ValueError:
490
                    if isinstance(path, bytes):
Georg Brandl's avatar
Georg Brandl committed
491
                        res += b'${' + path
492
                    else:
Georg Brandl's avatar
Georg Brandl committed
493
                        res += '${' + path
494 495 496
                    index = pathlen - 1
            else:
                var = ''
Georg Brandl's avatar
Georg Brandl committed
497
                index += 1
498
                c = path[index:index + 1]
499 500
                while c and c in varchars:
                    if isinstance(path, bytes):
Georg Brandl's avatar
Georg Brandl committed
501
                        var += c.decode('ascii')
502
                    else:
Georg Brandl's avatar
Georg Brandl committed
503 504
                        var += c
                    index += 1
505
                    c = path[index:index + 1]
506
                if var in os.environ:
507
                    value = os.environ[var]
508
                else:
509 510 511
                    value = '$' + var
                if isinstance(path, bytes):
                    value = value.encode('ascii')
Georg Brandl's avatar
Georg Brandl committed
512
                res += value
513
                if c:
Georg Brandl's avatar
Georg Brandl committed
514
                    index -= 1
515
        else:
Georg Brandl's avatar
Georg Brandl committed
516 517
            res += c
        index += 1
518
    return res
519 520


521
# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.
522 523
# Previously, this function also truncated pathnames to 8+3 format,
# but as this module is called "ntpath", that's obviously wrong!
524 525

def normpath(path):
526
    """Normalize path, eliminating double slashes, etc."""
527
    sep = _get_sep(path)
528
    dotdot = _get_dot(path) * 2
529 530 531 532 533 534 535
    special_prefixes = _get_special(path)
    if path.startswith(special_prefixes):
        # in the case of paths with these prefixes:
        # \\.\ -> device names
        # \\?\ -> literal paths
        # do not do any normalization, but return the path unchanged
        return path
536
    path = path.replace(_get_altsep(path), sep)
537
    prefix, path = splitdrive(path)
538 539 540

    # collapse initial backslashes
    if path.startswith(sep):
Georg Brandl's avatar
Georg Brandl committed
541
        prefix += sep
542 543
        path = path.lstrip(sep)

544
    comps = path.split(sep)
545 546
    i = 0
    while i < len(comps):
547
        if not comps[i] or comps[i] == _get_dot(path):
548
            del comps[i]
549 550
        elif comps[i] == dotdot:
            if i > 0 and comps[i-1] != dotdot:
551 552
                del comps[i-1:i+1]
                i -= 1
553
            elif i == 0 and prefix.endswith(_get_sep(path)):
554 555 556
                del comps[i]
            else:
                i += 1
557
        else:
558
            i += 1
559 560
    # If the path is now empty, substitute '.'
    if not prefix and not comps:
561 562
        comps.append(_get_dot(path))
    return prefix + sep.join(comps)
Guido van Rossum's avatar
Guido van Rossum committed
563 564 565


# Return an absolute path.
566 567 568 569 570 571 572
try:
    from nt import _getfullpathname

except ImportError: # not running on Windows - mock up something sensible
    def abspath(path):
        """Return the absolute version of a path."""
        if not isabs(path):
573 574 575 576 577
            if isinstance(path, bytes):
                cwd = os.getcwdb()
            else:
                cwd = os.getcwd()
            path = join(cwd, path)
578 579 580 581 582 583 584 585 586 587 588
        return normpath(path)

else:  # use native Windows method on Windows
    def abspath(path):
        """Return the absolute version of a path."""

        if path: # Empty path must return current working directory.
            try:
                path = _getfullpathname(path)
            except WindowsError:
                pass # Bad path - return unchanged.
589 590
        elif isinstance(path, bytes):
            path = os.getcwdb()
591 592 593
        else:
            path = os.getcwd()
        return normpath(path)
594 595 596

# realpath is a no-op on systems without islink support
realpath = abspath
597
# Win9x family and earlier have no Unicode filename support.
598 599
supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
                              sys.getwindowsversion()[3] >= 2)
600 601 602

def relpath(path, start=curdir):
    """Return a relative version of a path"""
603 604 605 606
    sep = _get_sep(path)

    if start is curdir:
        start = _get_dot(path)
607 608 609

    if not path:
        raise ValueError("no path specified")
610 611 612 613 614

    start_abs = abspath(normpath(start))
    path_abs = abspath(normpath(path))
    start_drive, start_rest = splitdrive(start_abs)
    path_drive, path_rest = splitdrive(path_abs)
615
    if normcase(start_drive) != normcase(path_drive):
616 617 618 619 620 621
        error = "path is on mount '{0}', start on mount '{1}'".format(
            path_drive, start_drive)
        raise ValueError(error)

    start_list = [x for x in start_rest.split(sep) if x]
    path_list = [x for x in path_rest.split(sep) if x]
622
    # Work out how much of the filepath is shared by start and path.
623 624
    i = 0
    for e1, e2 in zip(start_list, path_list):
625
        if normcase(e1) != normcase(e2):
626 627 628
            break
        i += 1

629 630 631 632
    if isinstance(path, bytes):
        pardir = b'..'
    else:
        pardir = '..'
633
    rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
634
    if not rel_list:
635
        return _get_dot(path)
636
    return join(*rel_list)
637 638 639


# determine if two files are in fact the same file
640
try:
641 642 643 644 645 646 647
    # GetFinalPathNameByHandle is available starting with Windows 6.0.
    # Windows XP and non-Windows OS'es will mock _getfinalpathname.
    if sys.getwindowsversion()[:2] >= (6, 0):
        from nt import _getfinalpathname
    else:
        raise ImportError
except (AttributeError, ImportError):
648 649
    # On Windows XP and earlier, two files are the same if their absolute
    # pathnames are the same.
650 651
    # Non-Windows operating systems fake this method with an XP
    # approximation.
652
    def _getfinalpathname(f):
653
        return normcase(abspath(f))
654

655 656
def samefile(f1, f2):
    "Test whether two pathnames reference the same actual file"
657 658 659 660 661 662 663 664 665 666
    return _getfinalpathname(f1) == _getfinalpathname(f2)


try:
    from nt import _getfileinformation
except ImportError:
    # On other operating systems, just return the fd and see that
    # it compares equal in sameopenfile.
    def _getfileinformation(fd):
        return fd
667 668 669

def sameopenfile(f1, f2):
    """Test whether two file objects reference the same file"""
670
    return _getfileinformation(f1) == _getfileinformation(f2)
671 672 673 674 675 676 677


try:
    # The genericpath.isdir implementation uses os.stat and checks the mode
    # attribute to tell whether or not the path is a directory.
    # This is overkill on Windows - just pass the path to GetFileAttributes
    # and check the attribute from there.
678
    from nt import _isdir as isdir
679
except ImportError:
680 681
    # Use genericpath.isdir as imported above.
    pass