ntpath.py 20 KB
Newer Older
1
# Module 'ntpath' -- common operations on WinNT/Win95 pathnames
Tim Peters's avatar
Tim Peters committed
2
"""Common pathname manipulations, WindowsNT/95 version.
3 4 5

Instead of importing this module directly, import os and refer to this
module as os.path.
6
"""
7 8

import os
9
import sys
10
import stat
11
import genericpath
12
from genericpath import *
13

14 15
__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
           "basename","dirname","commonprefix","getsize","getmtime",
16
           "getatime","getctime", "islink","exists","lexists","isdir","isfile",
Benjamin Peterson's avatar
Benjamin Peterson committed
17
           "ismount", "expanduser","expandvars","normpath","abspath",
18
           "splitunc","curdir","pardir","sep","pathsep","defpath","altsep",
19
           "extsep","devnull","realpath","supports_unicode_filenames","relpath",
20
           "samefile", "sameopenfile", "samestat",]
21

22
# strings representing various path-related bits and pieces
23
# These are primarily for export; internally, they are hardcoded.
24 25 26 27 28
curdir = '.'
pardir = '..'
extsep = '.'
sep = '\\'
pathsep = ';'
29
altsep = '/'
30
defpath = '.;C:\\bin'
31 32
if 'ce' in sys.builtin_module_names:
    defpath = '\\Windows'
33
devnull = 'nul'
34

35 36 37 38 39 40
def _get_empty(path):
    if isinstance(path, bytes):
        return b''
    else:
        return ''

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
def _get_sep(path):
    if isinstance(path, bytes):
        return b'\\'
    else:
        return '\\'

def _get_altsep(path):
    if isinstance(path, bytes):
        return b'/'
    else:
        return '/'

def _get_bothseps(path):
    if isinstance(path, bytes):
        return b'\\/'
    else:
        return '\\/'

def _get_dot(path):
    if isinstance(path, bytes):
        return b'.'
    else:
        return '.'

def _get_colon(path):
    if isinstance(path, bytes):
        return b':'
    else:
        return ':'

71 72 73 74 75 76
def _get_special(path):
    if isinstance(path, bytes):
        return (b'\\\\.\\', b'\\\\?\\')
    else:
        return ('\\\\.\\', '\\\\?\\')

77 78
# Normalize the case of a pathname and map slashes to backslashes.
# Other normalizations (such as optimizing '../' away) are not done
79
# (this is done by normpath).
80

81
def normcase(s):
82 83
    """Normalize case of pathname.

84
    Makes all characters lowercase and all slashes into backslashes."""
85 86 87
    if not isinstance(s, (bytes, str)):
        raise TypeError("normcase() argument must be str or bytes, "
                        "not '{}'".format(s.__class__.__name__))
88
    return s.replace(_get_altsep(s), _get_sep(s)).lower()
89

90

91
# Return whether a path is absolute.
92 93 94
# Trivial in Posix, harder on Windows.
# For Windows it is absolute if it starts with a slash or backslash (current
# volume), or if a pathname after the volume-letter-and-colon or UNC-resource
95
# starts with a slash or backslash.
96 97

def isabs(s):
98 99
    """Test whether a path is absolute"""
    s = splitdrive(s)[1]
100
    return len(s) > 0 and s[:1] in _get_bothseps(s)
101 102


103
# Join two (or more) paths.
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
def join(path, *paths):
    sep = _get_sep(path)
    seps = _get_bothseps(path)
    colon = _get_colon(path)
    result_drive, result_path = splitdrive(path)
    for p in paths:
        p_drive, p_path = splitdrive(p)
        if p_path and p_path[0] in seps:
            # Second path is absolute
            if p_drive or not result_drive:
                result_drive = p_drive
            result_path = p_path
            continue
        elif p_drive and p_drive != result_drive:
            if p_drive.lower() != result_drive.lower():
                # Different drives => ignore the first path entirely
                result_drive = p_drive
                result_path = p_path
                continue
            # Same drive in different case
            result_drive = p_drive
        # Second path is relative to the first
        if result_path and result_path[-1] not in seps:
            result_path = result_path + sep
        result_path = result_path + p_path
    ## add separator between UNC and non-absolute path
    if (result_path and result_path[0] not in seps and
        result_drive and result_drive[-1:] != colon):
        return result_drive + sep + result_path
    return result_drive + result_path
134 135 136


# Split a path in a drive specification (a drive letter followed by a
137
# colon) and the path specification.
138 139
# It is always true that drivespec + pathspec == p
def splitdrive(p):
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
    """Split a pathname into drive/UNC sharepoint and relative path specifiers.
    Returns a 2-tuple (drive_or_unc, path); either part may be empty.

    If you assign
        result = splitdrive(p)
    It is always true that:
        result[0] + result[1] == p

    If the path contained a drive letter, drive_or_unc will contain everything
    up to and including the colon.  e.g. splitdrive("c:/dir") returns ("c:", "/dir")

    If the path contained a UNC path, the drive_or_unc will contain the host name
    and share up to but not including the fourth directory separator character.
    e.g. splitdrive("//host/computer/dir") returns ("//host/computer", "/dir")

    Paths cannot contain both a drive letter and a UNC path.

    """
    empty = _get_empty(p)
    if len(p) > 1:
        sep = _get_sep(p)
161
        normp = p.replace(_get_altsep(p), sep)
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
        if (normp[0:2] == sep*2) and (normp[2:3] != sep):
            # is a UNC path:
            # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
            # \\machine\mountpoint\directory\etc\...
            #           directory ^^^^^^^^^^^^^^^
            index = normp.find(sep, 2)
            if index == -1:
                return empty, p
            index2 = normp.find(sep, index + 1)
            # a UNC path can't have two slashes in a row
            # (after the initial two)
            if index2 == index + 1:
                return empty, p
            if index2 == -1:
                index2 = len(p)
            return p[:index2], p[index2:]
        if normp[1:2] == _get_colon(p):
            return p[:2], p[2:]
    return empty, p
181 182 183 184


# Parse UNC paths
def splitunc(p):
185 186 187 188
    """Deprecated since Python 3.1.  Please use splitdrive() instead;
    it now handles UNC paths.

    Split a pathname into UNC mount point and relative path specifiers.
189 190 191 192 193 194

    Return a 2-tuple (unc, rest); either part may be empty.
    If unc is not empty, it has the form '//host/mount' (or similar
    using backslashes).  unc+rest is always the input path.
    Paths containing drive letters never have an UNC part.
    """
195 196
    import warnings
    warnings.warn("ntpath.splitunc is deprecated, use ntpath.splitdrive instead",
197 198 199 200 201 202
                  DeprecationWarning, 2)
    drive, path = splitdrive(p)
    if len(drive) == 2:
         # Drive letter present
        return p[:0], p
    return drive, path
203 204 205


# Split a path in head (everything up to the last '/') and tail (the
206
# rest).  After the trailing '/' is stripped, the invariant
207 208 209 210
# join(head, tail) == p holds.
# The resulting head won't end in '/' unless it is the root.

def split(p):
211 212 213 214
    """Split a pathname.

    Return tuple (head, tail) where tail is everything after the final slash.
    Either part may be empty."""
215

216
    seps = _get_bothseps(p)
217
    d, p = splitdrive(p)
218 219
    # set i to index beyond p's last slash
    i = len(p)
220
    while i and p[i-1] not in seps:
Georg Brandl's avatar
Georg Brandl committed
221
        i -= 1
222 223 224
    head, tail = p[:i], p[i:]  # now tail has no slashes
    # remove trailing slashes from head, unless it's all slashes
    head2 = head
225
    while head2 and head2[-1:] in seps:
226 227
        head2 = head2[:-1]
    head = head2 or head
228
    return d + head, tail
229 230 231


# Split a path in root and extension.
232
# The extension is everything starting at the last dot in the last
233 234 235 236
# pathname component; the root is everything before that.
# It is always true that root + ext == p.

def splitext(p):
237 238
    return genericpath._splitext(p, _get_sep(p), _get_altsep(p),
                                 _get_dot(p))
239
splitext.__doc__ = genericpath._splitext.__doc__
240 241 242 243 244


# Return the tail (basename) part of a path.

def basename(p):
245 246
    """Returns the final component of a pathname"""
    return split(p)[1]
247 248 249 250 251


# Return the head (dirname) part of a path.

def dirname(p):
252 253
    """Returns the directory component of a pathname"""
    return split(p)[0]
254 255

# Is a path a symbolic link?
256
# This will always return false on systems where os.lstat doesn't exist.
257 258

def islink(path):
259
    """Test whether a path is a symbolic link.
260
    This will always return false for Windows prior to 6.0.
261
    """
262 263
    try:
        st = os.lstat(path)
264
    except (OSError, AttributeError):
265 266 267 268 269 270 271 272 273
        return False
    return stat.S_ISLNK(st.st_mode)

# Being true for dangling symbolic links is also useful.

def lexists(path):
    """Test whether a path exists.  Returns True for broken symbolic links"""
    try:
        st = os.lstat(path)
274
    except OSError:
275 276
        return False
    return True
277

278 279 280 281 282 283 284 285 286 287 288 289 290 291
# Is a path a mount point?
# Any drive letter root (eg c:\)
# Any share UNC (eg \\server\share)
# Any volume mounted on a filesystem folder
#
# No one method detects all three situations. Historically we've lexically
# detected drive letter roots and share UNCs. The canonical approach to
# detecting mounted volumes (querying the reparse tag) fails for the most
# common case: drive letter roots. The alternative which uses GetVolumePathName
# fails if the drive letter is the result of a SUBST.
try:
    from nt import _getvolumepathname
except ImportError:
    _getvolumepathname = None
292
def ismount(path):
293 294
    """Test whether a path is a mount point (a drive root, the root of a
    share, or a mounted volume)"""
295
    seps = _get_bothseps(path)
296
    path = abspath(path)
297 298 299
    root, rest = splitdrive(path)
    if root and root[0] in seps:
        return (not rest) or (rest in seps)
300 301 302 303 304 305 306
    if rest in seps:
        return True

    if _getvolumepathname:
        return path.rstrip(seps) == _getvolumepathname(path).rstrip(seps)
    else:
        return False
307 308 309 310 311 312 313 314 315 316 317 318


# Expand paths beginning with '~' or '~user'.
# '~' means $HOME; '~user' means that user's home directory.
# If the path doesn't begin with '~', or if the user or $HOME is unknown,
# the path is returned unchanged (leaving error reporting to whatever
# function is called with the expanded path as argument).
# See also module 'glob' for expansion of *, ? and [...] in pathnames.
# (A function should also be defined to do full *sh-style environment
# variable expansion.)

def expanduser(path):
319 320 321
    """Expand ~ and ~user constructs.

    If user or $HOME is unknown, do nothing."""
322 323 324 325 326
    if isinstance(path, bytes):
        tilde = b'~'
    else:
        tilde = '~'
    if not path.startswith(tilde):
327 328
        return path
    i, n = 1, len(path)
329
    while i < n and path[i] not in _get_bothseps(path):
Georg Brandl's avatar
Georg Brandl committed
330
        i += 1
331 332 333 334 335 336

    if 'HOME' in os.environ:
        userhome = os.environ['HOME']
    elif 'USERPROFILE' in os.environ:
        userhome = os.environ['USERPROFILE']
    elif not 'HOMEPATH' in os.environ:
337
        return path
338 339 340 341 342 343 344
    else:
        try:
            drive = os.environ['HOMEDRIVE']
        except KeyError:
            drive = ''
        userhome = join(drive, os.environ['HOMEPATH'])

345 346 347
    if isinstance(path, bytes):
        userhome = userhome.encode(sys.getfilesystemencoding())

348 349 350
    if i != 1: #~user
        userhome = join(dirname(userhome), path[1:i])

351
    return userhome + path[i:]
352 353 354 355


# Expand paths containing shell variable substitutions.
# The following rules apply:
356
#       - no expansion within single quotes
357 358
#       - '$$' is translated into '$'
#       - '%%' is translated into '%' if '%%' are not seen in %var1%%var2%
359
#       - ${varname} is accepted.
360 361 362
#       - $varname is accepted.
#       - %varname% is accepted.
#       - varnames can be made out of letters, digits and the characters '_-'
363
#         (though is not verified in the ${varname} and %varname% cases)
364 365 366
# XXX With COMMAND.COM you can use any characters in a variable name,
# XXX except '^|<>='.

Tim Peters's avatar
Tim Peters committed
367
def expandvars(path):
368
    """Expand shell variables of the forms $var, ${var} and %var%.
369 370

    Unknown variables are left unchanged."""
371 372 373 374 375
    if isinstance(path, bytes):
        if ord('$') not in path and ord('%') not in path:
            return path
        import string
        varchars = bytes(string.ascii_letters + string.digits + '_-', 'ascii')
376 377 378 379
        quote = b'\''
        percent = b'%'
        brace = b'{'
        dollar = b'$'
380
        environ = getattr(os, 'environb', None)
381 382 383 384 385
    else:
        if '$' not in path and '%' not in path:
            return path
        import string
        varchars = string.ascii_letters + string.digits + '_-'
386 387 388 389
        quote = '\''
        percent = '%'
        brace = '{'
        dollar = '$'
390
        environ = os.environ
391
    res = path[:0]
392 393 394
    index = 0
    pathlen = len(path)
    while index < pathlen:
395
        c = path[index:index+1]
396
        if c == quote:   # no expansion within single quotes
397 398 399
            path = path[index + 1:]
            pathlen = len(path)
            try:
400
                index = path.index(c)
Georg Brandl's avatar
Georg Brandl committed
401
                res += c + path[:index + 1]
402
            except ValueError:
Georg Brandl's avatar
Georg Brandl committed
403
                res += path
404
                index = pathlen - 1
405
        elif c == percent:  # variable or '%'
406
            if path[index + 1:index + 2] == percent:
Georg Brandl's avatar
Georg Brandl committed
407 408
                res += c
                index += 1
409 410 411 412
            else:
                path = path[index+1:]
                pathlen = len(path)
                try:
413
                    index = path.index(percent)
414
                except ValueError:
Georg Brandl's avatar
Georg Brandl committed
415
                    res += percent + path
416 417 418
                    index = pathlen - 1
                else:
                    var = path[:index]
419 420 421 422 423 424 425
                    try:
                        if environ is None:
                            value = os.fsencode(os.environ[os.fsdecode(var)])
                        else:
                            value = environ[var]
                    except KeyError:
                        value = percent + var + percent
Georg Brandl's avatar
Georg Brandl committed
426
                    res += value
427 428
        elif c == dollar:  # variable or '$$'
            if path[index + 1:index + 2] == dollar:
Georg Brandl's avatar
Georg Brandl committed
429 430
                res += c
                index += 1
431
            elif path[index + 1:index + 2] == brace:
432 433 434
                path = path[index+2:]
                pathlen = len(path)
                try:
435 436 437 438
                    if isinstance(path, bytes):
                        index = path.index(b'}')
                    else:
                        index = path.index('}')
439
                except ValueError:
440
                    if isinstance(path, bytes):
Georg Brandl's avatar
Georg Brandl committed
441
                        res += b'${' + path
442
                    else:
Georg Brandl's avatar
Georg Brandl committed
443
                        res += '${' + path
444
                    index = pathlen - 1
445 446 447 448 449 450 451 452 453 454 455 456 457
                else:
                    var = path[:index]
                    try:
                        if environ is None:
                            value = os.fsencode(os.environ[os.fsdecode(var)])
                        else:
                            value = environ[var]
                    except KeyError:
                        if isinstance(path, bytes):
                            value = b'${' + var + b'}'
                        else:
                            value = '${' + var + '}'
                    res += value
458
            else:
459
                var = path[:0]
Georg Brandl's avatar
Georg Brandl committed
460
                index += 1
461
                c = path[index:index + 1]
462
                while c and c in varchars:
463
                    var += c
Georg Brandl's avatar
Georg Brandl committed
464
                    index += 1
465
                    c = path[index:index + 1]
466 467 468 469 470 471 472
                try:
                    if environ is None:
                        value = os.fsencode(os.environ[os.fsdecode(var)])
                    else:
                        value = environ[var]
                except KeyError:
                    value = dollar + var
Georg Brandl's avatar
Georg Brandl committed
473
                res += value
474
                if c:
Georg Brandl's avatar
Georg Brandl committed
475
                    index -= 1
476
        else:
Georg Brandl's avatar
Georg Brandl committed
477 478
            res += c
        index += 1
479
    return res
480 481


482
# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.
483 484
# Previously, this function also truncated pathnames to 8+3 format,
# but as this module is called "ntpath", that's obviously wrong!
485 486

def normpath(path):
487
    """Normalize path, eliminating double slashes, etc."""
488
    sep = _get_sep(path)
489
    dotdot = _get_dot(path) * 2
490 491 492 493 494 495 496
    special_prefixes = _get_special(path)
    if path.startswith(special_prefixes):
        # in the case of paths with these prefixes:
        # \\.\ -> device names
        # \\?\ -> literal paths
        # do not do any normalization, but return the path unchanged
        return path
497
    path = path.replace(_get_altsep(path), sep)
498
    prefix, path = splitdrive(path)
499 500 501

    # collapse initial backslashes
    if path.startswith(sep):
Georg Brandl's avatar
Georg Brandl committed
502
        prefix += sep
503 504
        path = path.lstrip(sep)

505
    comps = path.split(sep)
506 507
    i = 0
    while i < len(comps):
508
        if not comps[i] or comps[i] == _get_dot(path):
509
            del comps[i]
510 511
        elif comps[i] == dotdot:
            if i > 0 and comps[i-1] != dotdot:
512 513
                del comps[i-1:i+1]
                i -= 1
514
            elif i == 0 and prefix.endswith(_get_sep(path)):
515 516 517
                del comps[i]
            else:
                i += 1
518
        else:
519
            i += 1
520 521
    # If the path is now empty, substitute '.'
    if not prefix and not comps:
522 523
        comps.append(_get_dot(path))
    return prefix + sep.join(comps)
Guido van Rossum's avatar
Guido van Rossum committed
524 525 526


# Return an absolute path.
527 528 529
try:
    from nt import _getfullpathname

530
except ImportError: # not running on Windows - mock up something sensible
531 532 533
    def abspath(path):
        """Return the absolute version of a path."""
        if not isabs(path):
534 535 536 537 538
            if isinstance(path, bytes):
                cwd = os.getcwdb()
            else:
                cwd = os.getcwd()
            path = join(cwd, path)
539 540 541 542 543 544 545 546 547
        return normpath(path)

else:  # use native Windows method on Windows
    def abspath(path):
        """Return the absolute version of a path."""

        if path: # Empty path must return current working directory.
            try:
                path = _getfullpathname(path)
548
            except OSError:
549
                pass # Bad path - return unchanged.
550 551
        elif isinstance(path, bytes):
            path = os.getcwdb()
552 553 554
        else:
            path = os.getcwd()
        return normpath(path)
555 556 557

# realpath is a no-op on systems without islink support
realpath = abspath
558
# Win9x family and earlier have no Unicode filename support.
559 560
supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
                              sys.getwindowsversion()[3] >= 2)
561 562 563

def relpath(path, start=curdir):
    """Return a relative version of a path"""
564 565 566 567
    sep = _get_sep(path)

    if start is curdir:
        start = _get_dot(path)
568 569 570

    if not path:
        raise ValueError("no path specified")
571 572 573 574 575

    start_abs = abspath(normpath(start))
    path_abs = abspath(normpath(path))
    start_drive, start_rest = splitdrive(start_abs)
    path_drive, path_rest = splitdrive(path_abs)
576
    if normcase(start_drive) != normcase(path_drive):
577 578 579 580 581 582
        error = "path is on mount '{0}', start on mount '{1}'".format(
            path_drive, start_drive)
        raise ValueError(error)

    start_list = [x for x in start_rest.split(sep) if x]
    path_list = [x for x in path_rest.split(sep) if x]
583
    # Work out how much of the filepath is shared by start and path.
584 585
    i = 0
    for e1, e2 in zip(start_list, path_list):
586
        if normcase(e1) != normcase(e2):
587 588 589
            break
        i += 1

590 591 592 593
    if isinstance(path, bytes):
        pardir = b'..'
    else:
        pardir = '..'
594
    rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
595
    if not rel_list:
596
        return _get_dot(path)
597
    return join(*rel_list)
598 599 600


# determine if two files are in fact the same file
601
try:
602 603 604 605 606 607 608
    # GetFinalPathNameByHandle is available starting with Windows 6.0.
    # Windows XP and non-Windows OS'es will mock _getfinalpathname.
    if sys.getwindowsversion()[:2] >= (6, 0):
        from nt import _getfinalpathname
    else:
        raise ImportError
except (AttributeError, ImportError):
609 610
    # On Windows XP and earlier, two files are the same if their absolute
    # pathnames are the same.
611 612
    # Non-Windows operating systems fake this method with an XP
    # approximation.
613
    def _getfinalpathname(f):
614
        return normcase(abspath(f))
615

616 617 618 619 620 621

try:
    # The genericpath.isdir implementation uses os.stat and checks the mode
    # attribute to tell whether or not the path is a directory.
    # This is overkill on Windows - just pass the path to GetFileAttributes
    # and check the attribute from there.
622
    from nt import _isdir as isdir
623
except ImportError:
624 625
    # Use genericpath.isdir as imported above.
    pass