uuid.py 21.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
r"""UUID objects (universally unique identifiers) according to RFC 4122.

This module provides immutable UUID objects (class UUID) and the functions
uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5
UUIDs as specified in RFC 4122.

If all you want is a unique ID, you should probably call uuid1() or uuid4().
Note that uuid1() may compromise privacy since it creates a UUID containing
the computer's network address.  uuid4() creates a random UUID.

Typical usage:

    >>> import uuid

    # make a UUID based on the host ID and current time
16
    >>> uuid.uuid1()    # doctest: +SKIP
17 18 19 20 21 22 23
    UUID('a8098c1a-f86e-11da-bd1a-00112444be1e')

    # make a UUID using an MD5 hash of a namespace UUID and a name
    >>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org')
    UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e')

    # make a random UUID
24
    >>> uuid.uuid4()    # doctest: +SKIP
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
    UUID('16fd2706-8baf-433b-82eb-8c7fada847da')

    # make a UUID using a SHA-1 hash of a namespace UUID and a name
    >>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org')
    UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d')

    # make a UUID from a string of hex digits (braces and hyphens ignored)
    >>> x = uuid.UUID('{00010203-0405-0607-0809-0a0b0c0d0e0f}')

    # convert a UUID to a string of hex digits in standard form
    >>> str(x)
    '00010203-0405-0607-0809-0a0b0c0d0e0f'

    # get the raw 16 bytes of the UUID
    >>> x.bytes
40
    b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
41 42 43 44 45 46 47 48 49 50 51 52

    # make a UUID from a 16-byte string
    >>> uuid.UUID(bytes=x.bytes)
    UUID('00010203-0405-0607-0809-0a0b0c0d0e0f')
"""

__author__ = 'Ka-Ping Yee <ping@zesty.ca>'

RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, RESERVED_FUTURE = [
    'reserved for NCS compatibility', 'specified in RFC 4122',
    'reserved for Microsoft compatibility', 'reserved for future definition']

53 54
int_ = int      # The built-in int type
bytes_ = bytes  # The built-in bytes type
55

56 57 58 59 60
class UUID(object):
    """Instances of the UUID class represent UUIDs as specified in RFC 4122.
    UUID objects are immutable, hashable, and usable as dictionary keys.
    Converting a UUID to a string with str() yields something in the form
    '12345678-1234-1234-1234-123456789abc'.  The UUID constructor accepts
61 62 63 64 65 66 67
    five possible forms: a similar string of hexadecimal digits, or a tuple
    of six integer fields (with 32-bit, 16-bit, 16-bit, 8-bit, 8-bit, and
    48-bit values respectively) as an argument named 'fields', or a string
    of 16 bytes (with all the integer fields in big-endian order) as an
    argument named 'bytes', or a string of 16 bytes (with the first three
    fields in little-endian order) as an argument named 'bytes_le', or a
    single 128-bit integer as an argument named 'int'.
68 69 70

    UUIDs have these read-only attributes:

71 72 73 74 75
        bytes       the UUID as a 16-byte string (containing the six
                    integer fields in big-endian byte order)

        bytes_le    the UUID as a 16-byte string (with time_low, time_mid,
                    and time_hi_version in little-endian byte order)
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103

        fields      a tuple of the six integer fields of the UUID,
                    which are also available as six individual attributes
                    and two derived attributes:

            time_low                the first 32 bits of the UUID
            time_mid                the next 16 bits of the UUID
            time_hi_version         the next 16 bits of the UUID
            clock_seq_hi_variant    the next 8 bits of the UUID
            clock_seq_low           the next 8 bits of the UUID
            node                    the last 48 bits of the UUID

            time                    the 60-bit timestamp
            clock_seq               the 14-bit sequence number

        hex         the UUID as a 32-character hexadecimal string

        int         the UUID as a 128-bit integer

        urn         the UUID as a URN as specified in RFC 4122

        variant     the UUID variant (one of the constants RESERVED_NCS,
                    RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE)

        version     the UUID version number (1 through 5, meaningful only
                    when the variant is RFC_4122)
    """

104 105
    def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
                       int=None, version=None):
106
        r"""Create a UUID from either a string of 32 hexadecimal digits,
107 108
        a string of 16 bytes as the 'bytes' argument, a string of 16 bytes
        in little-endian order as the 'bytes_le' argument, a tuple of six
109 110 111 112 113 114 115 116 117 118 119
        integers (32-bit time_low, 16-bit time_mid, 16-bit time_hi_version,
        8-bit clock_seq_hi_variant, 8-bit clock_seq_low, 48-bit node) as
        the 'fields' argument, or a single 128-bit integer as the 'int'
        argument.  When a string of hex digits is given, curly braces,
        hyphens, and a URN prefix are all optional.  For example, these
        expressions all yield the same UUID:

        UUID('{12345678-1234-5678-1234-567812345678}')
        UUID('12345678123456781234567812345678')
        UUID('urn:uuid:12345678-1234-5678-1234-567812345678')
        UUID(bytes='\x12\x34\x56\x78'*4)
120 121
        UUID(bytes_le='\x78\x56\x34\x12\x34\x12\x78\x56' +
                      '\x12\x34\x56\x78\x12\x34\x56\x78')
122 123 124
        UUID(fields=(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678))
        UUID(int=0x12345678123456781234567812345678)

125 126 127 128
        Exactly one of 'hex', 'bytes', 'bytes_le', 'fields', or 'int' must
        be given.  The 'version' argument is optional; if given, the resulting
        UUID will have its variant and version set according to RFC 4122,
        overriding the given 'hex', 'bytes', 'bytes_le', 'fields', or 'int'.
129 130
        """

131 132
        if [hex, bytes, bytes_le, fields, int].count(None) != 4:
            raise TypeError('need one of hex, bytes, bytes_le, fields, or int')
133 134 135 136 137
        if hex is not None:
            hex = hex.replace('urn:', '').replace('uuid:', '')
            hex = hex.strip('{}').replace('-', '')
            if len(hex) != 32:
                raise ValueError('badly formed hexadecimal UUID string')
138
            int = int_(hex, 16)
139 140 141
        if bytes_le is not None:
            if len(bytes_le) != 16:
                raise ValueError('bytes_le is not a 16-char string')
142 143 144
            bytes = (bytes_(reversed(bytes_le[0:4])) +
                     bytes_(reversed(bytes_le[4:6])) +
                     bytes_(reversed(bytes_le[6:8])) +
145
                     bytes_le[8:])
146 147 148
        if bytes is not None:
            if len(bytes) != 16:
                raise ValueError('bytes is not a 16-char string')
149 150
            assert isinstance(bytes, bytes_), repr(bytes)
            int = int_(('%02x'*16) % tuple(bytes), 16)
151 152 153 154 155
        if fields is not None:
            if len(fields) != 6:
                raise ValueError('fields is not a 6-tuple')
            (time_low, time_mid, time_hi_version,
             clock_seq_hi_variant, clock_seq_low, node) = fields
156
            if not 0 <= time_low < 1<<32:
157
                raise ValueError('field 1 out of range (need a 32-bit value)')
158
            if not 0 <= time_mid < 1<<16:
159
                raise ValueError('field 2 out of range (need a 16-bit value)')
160
            if not 0 <= time_hi_version < 1<<16:
161
                raise ValueError('field 3 out of range (need a 16-bit value)')
162
            if not 0 <= clock_seq_hi_variant < 1<<8:
163
                raise ValueError('field 4 out of range (need an 8-bit value)')
164
            if not 0 <= clock_seq_low < 1<<8:
165
                raise ValueError('field 5 out of range (need an 8-bit value)')
166
            if not 0 <= node < 1<<48:
167
                raise ValueError('field 6 out of range (need a 48-bit value)')
168 169 170
            clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low
            int = ((time_low << 96) | (time_mid << 80) |
                   (time_hi_version << 64) | (clock_seq << 48) | node)
171
        if int is not None:
172
            if not 0 <= int < 1<<128:
173 174 175 176 177
                raise ValueError('int is out of range (need a 128-bit value)')
        if version is not None:
            if not 1 <= version <= 5:
                raise ValueError('illegal version number')
            # Set the variant to RFC 4122.
178 179
            int &= ~(0xc000 << 48)
            int |= 0x8000 << 48
180
            # Set the version number.
181 182
            int &= ~(0xf000 << 64)
            int |= version << 76
183 184
        self.__dict__['int'] = int

185
    def __eq__(self, other):
186
        if isinstance(other, UUID):
187 188 189 190 191 192 193 194
            return self.int == other.int
        return NotImplemented

    def __ne__(self, other):
        if isinstance(other, UUID):
            return self.int != other.int
        return NotImplemented

195 196
    # Q. What's the value of being able to sort UUIDs?
    # A. Use them as keys in a B-Tree or similar mapping.
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215

    def __lt__(self, other):
        if isinstance(other, UUID):
            return self.int < other.int
        return NotImplemented

    def __gt__(self, other):
        if isinstance(other, UUID):
            return self.int > other.int
        return NotImplemented

    def __le__(self, other):
        if isinstance(other, UUID):
            return self.int <= other.int
        return NotImplemented

    def __ge__(self, other):
        if isinstance(other, UUID):
            return self.int >= other.int
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
        return NotImplemented

    def __hash__(self):
        return hash(self.int)

    def __int__(self):
        return self.int

    def __repr__(self):
        return 'UUID(%r)' % str(self)

    def __setattr__(self, name, value):
        raise TypeError('UUID objects are immutable')

    def __str__(self):
        hex = '%032x' % self.int
        return '%s-%s-%s-%s-%s' % (
            hex[:8], hex[8:12], hex[12:16], hex[16:20], hex[20:])

235 236
    @property
    def bytes(self):
237
        bytes = bytearray()
238
        for shift in range(0, 128, 8):
239
            bytes.insert(0, (self.int >> shift) & 0xff)
240
        return bytes_(bytes)
241

242 243
    @property
    def bytes_le(self):
244
        bytes = self.bytes
245 246 247 248
        return (bytes_(reversed(bytes[0:4])) +
                bytes_(reversed(bytes[4:6])) +
                bytes_(reversed(bytes[6:8])) +
                bytes[8:])
249

250 251
    @property
    def fields(self):
252 253 254
        return (self.time_low, self.time_mid, self.time_hi_version,
                self.clock_seq_hi_variant, self.clock_seq_low, self.node)

255 256
    @property
    def time_low(self):
257
        return self.int >> 96
258

259 260
    @property
    def time_mid(self):
261
        return (self.int >> 80) & 0xffff
262

263 264
    @property
    def time_hi_version(self):
265
        return (self.int >> 64) & 0xffff
266

267 268
    @property
    def clock_seq_hi_variant(self):
269
        return (self.int >> 56) & 0xff
270

271 272
    @property
    def clock_seq_low(self):
273
        return (self.int >> 48) & 0xff
274

275 276
    @property
    def time(self):
277 278
        return (((self.time_hi_version & 0x0fff) << 48) |
                (self.time_mid << 32) | self.time_low)
279

280 281
    @property
    def clock_seq(self):
282
        return (((self.clock_seq_hi_variant & 0x3f) << 8) |
283 284
                self.clock_seq_low)

285 286
    @property
    def node(self):
287 288
        return self.int & 0xffffffffffff

289 290
    @property
    def hex(self):
291 292
        return '%032x' % self.int

293 294
    @property
    def urn(self):
295 296
        return 'urn:uuid:' + str(self)

297 298
    @property
    def variant(self):
299
        if not self.int & (0x8000 << 48):
300
            return RESERVED_NCS
301
        elif not self.int & (0x4000 << 48):
302
            return RFC_4122
303
        elif not self.int & (0x2000 << 48):
304 305 306 307
            return RESERVED_MICROSOFT
        else:
            return RESERVED_FUTURE

308 309
    @property
    def version(self):
310 311
        # The version bits are only meaningful for RFC 4122 UUIDs.
        if self.variant == RFC_4122:
312
            return int((self.int >> 76) & 0xf)
313 314 315 316 317 318 319 320 321 322 323 324

def _find_mac(command, args, hw_identifiers, get_index):
    import os
    for dir in ['', '/sbin/', '/usr/sbin']:
        executable = os.path.join(dir, command)
        if not os.path.exists(executable):
            continue

        try:
            # LC_ALL to get English output, 2>/dev/null to
            # prevent output on stderr
            cmd = 'LC_ALL=C %s %s 2>/dev/null' % (executable, args)
325 326 327 328 329 330 331
            with os.popen(cmd) as pipe:
                for line in pipe:
                    words = line.lower().split()
                    for i in range(len(words)):
                        if words[i] in hw_identifiers:
                            return int(
                                words[get_index(i)].replace(':', ''), 16)
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
        except IOError:
            continue
    return None

def _ifconfig_getnode():
    """Get the hardware address on Unix by running ifconfig."""

    # This works on Linux ('' or '-a'), Tru64 ('-av'), but not all Unixes.
    for args in ('', '-a', '-av'):
        mac = _find_mac('ifconfig', args, ['hwaddr', 'ether'], lambda i: i+1)
        if mac:
            return mac

    import socket
    ip_addr = socket.gethostbyname(socket.gethostname())

    # Try getting the MAC addr from arp based on our IP address (Solaris).
    mac = _find_mac('arp', '-an', [ip_addr], lambda i: -1)
    if mac:
        return mac

    # This might work on HP-UX.
    mac = _find_mac('lanscan', '-ai', ['lan0'], lambda i: 0)
    if mac:
        return mac

    return None

def _ipconfig_getnode():
    """Get the hardware address on Windows by running ipconfig.exe."""
    import os, re
    dirs = ['', r'c:\windows\system32', r'c:\winnt\system32']
    try:
        import ctypes
        buffer = ctypes.create_string_buffer(300)
        ctypes.windll.kernel32.GetSystemDirectoryA(buffer, 300)
        dirs.insert(0, buffer.value.decode('mbcs'))
    except:
        pass
    for dir in dirs:
        try:
            pipe = os.popen(os.path.join(dir, 'ipconfig') + ' /all')
        except IOError:
            continue
376 377 378 379 380 381 382
        else:
            for line in pipe:
                value = line.split(':')[-1].strip().lower()
                if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value):
                    return int(value.replace('-', ''), 16)
        finally:
            pipe.close()
383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409

def _netbios_getnode():
    """Get the hardware address on Windows using NetBIOS calls.
    See http://support.microsoft.com/kb/118623 for details."""
    import win32wnet, netbios
    ncb = netbios.NCB()
    ncb.Command = netbios.NCBENUM
    ncb.Buffer = adapters = netbios.LANA_ENUM()
    adapters._pack()
    if win32wnet.Netbios(ncb) != 0:
        return
    adapters._unpack()
    for i in range(adapters.length):
        ncb.Reset()
        ncb.Command = netbios.NCBRESET
        ncb.Lana_num = ord(adapters.lana[i])
        if win32wnet.Netbios(ncb) != 0:
            continue
        ncb.Reset()
        ncb.Command = netbios.NCBASTAT
        ncb.Lana_num = ord(adapters.lana[i])
        ncb.Callname = '*'.ljust(16)
        ncb.Buffer = status = netbios.ADAPTER_STATUS()
        if win32wnet.Netbios(ncb) != 0:
            continue
        status._unpack()
        bytes = map(ord, status.adapter_address)
410 411
        return ((bytes[0]<<40) + (bytes[1]<<32) + (bytes[2]<<24) +
                (bytes[3]<<16) + (bytes[4]<<8) + bytes[5])
412 413 414 415

# Thanks to Thomas Heller for ctypes and for his help with its use here.

# If ctypes is available, use it to find system routines for UUID generation.
Guido van Rossum's avatar
Guido van Rossum committed
416
# XXX This makes the module non-thread-safe!
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
_uuid_generate_random = _uuid_generate_time = _UuidCreate = None
try:
    import ctypes, ctypes.util

    # The uuid_generate_* routines are provided by libuuid on at least
    # Linux and FreeBSD, and provided by libc on Mac OS X.
    for libname in ['uuid', 'c']:
        try:
            lib = ctypes.CDLL(ctypes.util.find_library(libname))
        except:
            continue
        if hasattr(lib, 'uuid_generate_random'):
            _uuid_generate_random = lib.uuid_generate_random
        if hasattr(lib, 'uuid_generate_time'):
            _uuid_generate_time = lib.uuid_generate_time

433
    # The uuid_generate_* functions are broken on MacOS X 10.5, as noted
434 435 436 437
    # in issue #8621 the function generates the same sequence of values
    # in the parent process and all children created using fork (unless
    # those children use exec as well).
    #
438
    # Assume that the uuid_generate functions are broken from 10.5 onward,
439 440 441 442
    # the test can be adjusted when a later version is fixed.
    import sys
    if sys.platform == 'darwin':
        import os
443
        if int(os.uname()[2].split('.')[0]) >= 9:
444 445
            _uuid_generate_random = _uuid_generate_time = None

446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
    # On Windows prior to 2000, UuidCreate gives a UUID containing the
    # hardware address.  On Windows 2000 and later, UuidCreate makes a
    # random UUID and UuidCreateSequential gives a UUID containing the
    # hardware address.  These routines are provided by the RPC runtime.
    # NOTE:  at least on Tim's WinXP Pro SP2 desktop box, while the last
    # 6 bytes returned by UuidCreateSequential are fixed, they don't appear
    # to bear any relationship to the MAC address of any network device
    # on the box.
    try:
        lib = ctypes.windll.rpcrt4
    except:
        lib = None
    _UuidCreate = getattr(lib, 'UuidCreateSequential',
                          getattr(lib, 'UuidCreate', None))
except:
    pass

def _unixdll_getnode():
    """Get the hardware address on Unix using ctypes."""
465
    _buffer = ctypes.create_string_buffer(16)
466
    _uuid_generate_time(_buffer)
Guido van Rossum's avatar
Guido van Rossum committed
467
    return UUID(bytes=bytes_(_buffer.raw)).node
468 469 470

def _windll_getnode():
    """Get the hardware address on Windows using ctypes."""
471
    _buffer = ctypes.create_string_buffer(16)
472
    if _UuidCreate(_buffer) == 0:
Guido van Rossum's avatar
Guido van Rossum committed
473
        return UUID(bytes=bytes_(_buffer.raw)).node
474 475 476 477

def _random_getnode():
    """Get a random node ID, with eighth bit set as suggested by RFC 4122."""
    import random
478
    return random.randrange(0, 1<<48) | 0x010000000000
479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508

_node = None

def getnode():
    """Get the hardware address as a 48-bit positive integer.

    The first time this runs, it may launch a separate program, which could
    be quite slow.  If all attempts to obtain the hardware address fail, we
    choose a random 48-bit number with its eighth bit set to 1 as recommended
    in RFC 4122.
    """

    global _node
    if _node is not None:
        return _node

    import sys
    if sys.platform == 'win32':
        getters = [_windll_getnode, _netbios_getnode, _ipconfig_getnode]
    else:
        getters = [_unixdll_getnode, _ifconfig_getnode]

    for getter in getters + [_random_getnode]:
        try:
            _node = getter()
        except:
            continue
        if _node is not None:
            return _node

509 510
_last_timestamp = None

511 512 513 514 515 516 517 518 519
def uuid1(node=None, clock_seq=None):
    """Generate a UUID from a host ID, sequence number, and the current time.
    If 'node' is not given, getnode() is used to obtain the hardware
    address.  If 'clock_seq' is given, it is used as the sequence number;
    otherwise a random 14-bit sequence number is chosen."""

    # When the system provides a version-1 UUID generator, use it (but don't
    # use UuidCreate here because its UUIDs don't conform to RFC 4122).
    if _uuid_generate_time and node is clock_seq is None:
520
        _buffer = ctypes.create_string_buffer(16)
521
        _uuid_generate_time(_buffer)
Guido van Rossum's avatar
Guido van Rossum committed
522
        return UUID(bytes=bytes_(_buffer.raw))
523

524
    global _last_timestamp
525 526 527 528
    import time
    nanoseconds = int(time.time() * 1e9)
    # 0x01b21dd213814000 is the number of 100-ns intervals between the
    # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
529
    timestamp = int(nanoseconds/100) + 0x01b21dd213814000
530
    if _last_timestamp is not None and timestamp <= _last_timestamp:
531 532
        timestamp = _last_timestamp + 1
    _last_timestamp = timestamp
533 534
    if clock_seq is None:
        import random
535 536 537 538 539 540
        clock_seq = random.randrange(1<<14) # instead of stable storage
    time_low = timestamp & 0xffffffff
    time_mid = (timestamp >> 32) & 0xffff
    time_hi_version = (timestamp >> 48) & 0x0fff
    clock_seq_low = clock_seq & 0xff
    clock_seq_hi_variant = (clock_seq >> 8) & 0x3f
541 542 543 544 545 546 547
    if node is None:
        node = getnode()
    return UUID(fields=(time_low, time_mid, time_hi_version,
                        clock_seq_hi_variant, clock_seq_low, node), version=1)

def uuid3(namespace, name):
    """Generate a UUID from the MD5 hash of a namespace UUID and a name."""
548
    from hashlib import md5
549
    hash = md5(namespace.bytes + bytes(name, "utf-8")).digest()
550
    return UUID(bytes=hash[:16], version=3)
551 552 553 554 555 556

def uuid4():
    """Generate a random UUID."""

    # When the system provides a version-4 UUID generator, use it.
    if _uuid_generate_random:
557
        _buffer = ctypes.create_string_buffer(16)
558
        _uuid_generate_random(_buffer)
Guido van Rossum's avatar
Guido van Rossum committed
559
        return UUID(bytes=bytes_(_buffer.raw))
560 561 562 563 564 565 566

    # Otherwise, get randomness from urandom or the 'random' module.
    try:
        import os
        return UUID(bytes=os.urandom(16), version=4)
    except:
        import random
567
        bytes = bytes_(random.randrange(256) for i in range(16))
568 569 570 571
        return UUID(bytes=bytes, version=4)

def uuid5(namespace, name):
    """Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
572
    from hashlib import sha1
573
    hash = sha1(namespace.bytes + bytes(name, "utf-8")).digest()
574
    return UUID(bytes=hash[:16], version=5)
575 576 577 578 579 580 581

# The following standard UUIDs are for use with uuid3() or uuid5().

NAMESPACE_DNS = UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8')
NAMESPACE_URL = UUID('6ba7b811-9dad-11d1-80b4-00c04fd430c8')
NAMESPACE_OID = UUID('6ba7b812-9dad-11d1-80b4-00c04fd430c8')
NAMESPACE_X500 = UUID('6ba7b814-9dad-11d1-80b4-00c04fd430c8')