uuid.py 21 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
r"""UUID objects (universally unique identifiers) according to RFC 4122.

This module provides immutable UUID objects (class UUID) and the functions
uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5
UUIDs as specified in RFC 4122.

If all you want is a unique ID, you should probably call uuid1() or uuid4().
Note that uuid1() may compromise privacy since it creates a UUID containing
the computer's network address.  uuid4() creates a random UUID.

Typical usage:

    >>> import uuid

    # make a UUID based on the host ID and current time
    >>> uuid.uuid1()
    UUID('a8098c1a-f86e-11da-bd1a-00112444be1e')

    # make a UUID using an MD5 hash of a namespace UUID and a name
    >>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org')
    UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e')

    # make a random UUID
    >>> uuid.uuid4()
    UUID('16fd2706-8baf-433b-82eb-8c7fada847da')

    # make a UUID using a SHA-1 hash of a namespace UUID and a name
    >>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org')
    UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d')

    # make a UUID from a string of hex digits (braces and hyphens ignored)
    >>> x = uuid.UUID('{00010203-0405-0607-0809-0a0b0c0d0e0f}')

    # convert a UUID to a string of hex digits in standard form
    >>> str(x)
    '00010203-0405-0607-0809-0a0b0c0d0e0f'

    # get the raw 16 bytes of the UUID
    >>> x.bytes
40
    b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
41 42 43 44 45 46 47 48 49 50 51 52

    # make a UUID from a 16-byte string
    >>> uuid.UUID(bytes=x.bytes)
    UUID('00010203-0405-0607-0809-0a0b0c0d0e0f')
"""

__author__ = 'Ka-Ping Yee <ping@zesty.ca>'

RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, RESERVED_FUTURE = [
    'reserved for NCS compatibility', 'specified in RFC 4122',
    'reserved for Microsoft compatibility', 'reserved for future definition']

53 54
int_ = int      # The built-in int type
bytes_ = bytes  # The built-in bytes type
55

56 57 58 59 60
class UUID(object):
    """Instances of the UUID class represent UUIDs as specified in RFC 4122.
    UUID objects are immutable, hashable, and usable as dictionary keys.
    Converting a UUID to a string with str() yields something in the form
    '12345678-1234-1234-1234-123456789abc'.  The UUID constructor accepts
61 62 63 64 65 66 67
    five possible forms: a similar string of hexadecimal digits, or a tuple
    of six integer fields (with 32-bit, 16-bit, 16-bit, 8-bit, 8-bit, and
    48-bit values respectively) as an argument named 'fields', or a string
    of 16 bytes (with all the integer fields in big-endian order) as an
    argument named 'bytes', or a string of 16 bytes (with the first three
    fields in little-endian order) as an argument named 'bytes_le', or a
    single 128-bit integer as an argument named 'int'.
68 69 70

    UUIDs have these read-only attributes:

71 72 73 74 75
        bytes       the UUID as a 16-byte string (containing the six
                    integer fields in big-endian byte order)

        bytes_le    the UUID as a 16-byte string (with time_low, time_mid,
                    and time_hi_version in little-endian byte order)
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103

        fields      a tuple of the six integer fields of the UUID,
                    which are also available as six individual attributes
                    and two derived attributes:

            time_low                the first 32 bits of the UUID
            time_mid                the next 16 bits of the UUID
            time_hi_version         the next 16 bits of the UUID
            clock_seq_hi_variant    the next 8 bits of the UUID
            clock_seq_low           the next 8 bits of the UUID
            node                    the last 48 bits of the UUID

            time                    the 60-bit timestamp
            clock_seq               the 14-bit sequence number

        hex         the UUID as a 32-character hexadecimal string

        int         the UUID as a 128-bit integer

        urn         the UUID as a URN as specified in RFC 4122

        variant     the UUID variant (one of the constants RESERVED_NCS,
                    RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE)

        version     the UUID version number (1 through 5, meaningful only
                    when the variant is RFC_4122)
    """

104 105
    def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
                       int=None, version=None):
106
        r"""Create a UUID from either a string of 32 hexadecimal digits,
107 108
        a string of 16 bytes as the 'bytes' argument, a string of 16 bytes
        in little-endian order as the 'bytes_le' argument, a tuple of six
109 110 111 112 113 114 115 116 117 118 119
        integers (32-bit time_low, 16-bit time_mid, 16-bit time_hi_version,
        8-bit clock_seq_hi_variant, 8-bit clock_seq_low, 48-bit node) as
        the 'fields' argument, or a single 128-bit integer as the 'int'
        argument.  When a string of hex digits is given, curly braces,
        hyphens, and a URN prefix are all optional.  For example, these
        expressions all yield the same UUID:

        UUID('{12345678-1234-5678-1234-567812345678}')
        UUID('12345678123456781234567812345678')
        UUID('urn:uuid:12345678-1234-5678-1234-567812345678')
        UUID(bytes='\x12\x34\x56\x78'*4)
120 121
        UUID(bytes_le='\x78\x56\x34\x12\x34\x12\x78\x56' +
                      '\x12\x34\x56\x78\x12\x34\x56\x78')
122 123 124
        UUID(fields=(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678))
        UUID(int=0x12345678123456781234567812345678)

125 126 127 128
        Exactly one of 'hex', 'bytes', 'bytes_le', 'fields', or 'int' must
        be given.  The 'version' argument is optional; if given, the resulting
        UUID will have its variant and version set according to RFC 4122,
        overriding the given 'hex', 'bytes', 'bytes_le', 'fields', or 'int'.
129 130
        """

131 132
        if [hex, bytes, bytes_le, fields, int].count(None) != 4:
            raise TypeError('need one of hex, bytes, bytes_le, fields, or int')
133 134 135 136 137
        if hex is not None:
            hex = hex.replace('urn:', '').replace('uuid:', '')
            hex = hex.strip('{}').replace('-', '')
            if len(hex) != 32:
                raise ValueError('badly formed hexadecimal UUID string')
138
            int = int_(hex, 16)
139 140 141
        if bytes_le is not None:
            if len(bytes_le) != 16:
                raise ValueError('bytes_le is not a 16-char string')
142 143 144
            bytes = (bytes_(reversed(bytes_le[0:4])) +
                     bytes_(reversed(bytes_le[4:6])) +
                     bytes_(reversed(bytes_le[6:8])) +
145
                     bytes_le[8:])
146 147 148
        if bytes is not None:
            if len(bytes) != 16:
                raise ValueError('bytes is not a 16-char string')
149 150
            assert isinstance(bytes, bytes_), repr(bytes)
            int = int_(('%02x'*16) % tuple(bytes), 16)
151 152 153 154 155
        if fields is not None:
            if len(fields) != 6:
                raise ValueError('fields is not a 6-tuple')
            (time_low, time_mid, time_hi_version,
             clock_seq_hi_variant, clock_seq_low, node) = fields
156
            if not 0 <= time_low < 1<<32:
157
                raise ValueError('field 1 out of range (need a 32-bit value)')
158
            if not 0 <= time_mid < 1<<16:
159
                raise ValueError('field 2 out of range (need a 16-bit value)')
160
            if not 0 <= time_hi_version < 1<<16:
161
                raise ValueError('field 3 out of range (need a 16-bit value)')
162
            if not 0 <= clock_seq_hi_variant < 1<<8:
163
                raise ValueError('field 4 out of range (need an 8-bit value)')
164
            if not 0 <= clock_seq_low < 1<<8:
165
                raise ValueError('field 5 out of range (need an 8-bit value)')
166
            if not 0 <= node < 1<<48:
167
                raise ValueError('field 6 out of range (need a 48-bit value)')
168 169 170
            clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low
            int = ((time_low << 96) | (time_mid << 80) |
                   (time_hi_version << 64) | (clock_seq << 48) | node)
171
        if int is not None:
172
            if not 0 <= int < 1<<128:
173 174 175 176 177
                raise ValueError('int is out of range (need a 128-bit value)')
        if version is not None:
            if not 1 <= version <= 5:
                raise ValueError('illegal version number')
            # Set the variant to RFC 4122.
178 179
            int &= ~(0xc000 << 48)
            int |= 0x8000 << 48
180
            # Set the version number.
181 182
            int &= ~(0xf000 << 64)
            int |= version << 76
183 184
        self.__dict__['int'] = int

185
    def __eq__(self, other):
186
        if isinstance(other, UUID):
187 188 189 190 191 192 193 194
            return self.int == other.int
        return NotImplemented

    def __ne__(self, other):
        if isinstance(other, UUID):
            return self.int != other.int
        return NotImplemented

195 196
    # Q. What's the value of being able to sort UUIDs?
    # A. Use them as keys in a B-Tree or similar mapping.
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215

    def __lt__(self, other):
        if isinstance(other, UUID):
            return self.int < other.int
        return NotImplemented

    def __gt__(self, other):
        if isinstance(other, UUID):
            return self.int > other.int
        return NotImplemented

    def __le__(self, other):
        if isinstance(other, UUID):
            return self.int <= other.int
        return NotImplemented

    def __ge__(self, other):
        if isinstance(other, UUID):
            return self.int >= other.int
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
        return NotImplemented

    def __hash__(self):
        return hash(self.int)

    def __int__(self):
        return self.int

    def __repr__(self):
        return 'UUID(%r)' % str(self)

    def __setattr__(self, name, value):
        raise TypeError('UUID objects are immutable')

    def __str__(self):
        hex = '%032x' % self.int
        return '%s-%s-%s-%s-%s' % (
            hex[:8], hex[8:12], hex[12:16], hex[16:20], hex[20:])

235 236
    @property
    def bytes(self):
237
        bytes = bytearray()
238
        for shift in range(0, 128, 8):
239
            bytes.insert(0, (self.int >> shift) & 0xff)
240 241
        return bytes

242 243
    @property
    def bytes_le(self):
244
        bytes = self.bytes
245 246 247 248
        return (bytes_(reversed(bytes[0:4])) +
                bytes_(reversed(bytes[4:6])) +
                bytes_(reversed(bytes[6:8])) +
                bytes[8:])
249

250 251
    @property
    def fields(self):
252 253 254
        return (self.time_low, self.time_mid, self.time_hi_version,
                self.clock_seq_hi_variant, self.clock_seq_low, self.node)

255 256
    @property
    def time_low(self):
257
        return self.int >> 96
258

259 260
    @property
    def time_mid(self):
261
        return (self.int >> 80) & 0xffff
262

263 264
    @property
    def time_hi_version(self):
265
        return (self.int >> 64) & 0xffff
266

267 268
    @property
    def clock_seq_hi_variant(self):
269
        return (self.int >> 56) & 0xff
270

271 272
    @property
    def clock_seq_low(self):
273
        return (self.int >> 48) & 0xff
274

275 276
    @property
    def time(self):
277 278
        return (((self.time_hi_version & 0x0fff) << 48) |
                (self.time_mid << 32) | self.time_low)
279

280 281
    @property
    def clock_seq(self):
282
        return (((self.clock_seq_hi_variant & 0x3f) << 8) |
283 284
                self.clock_seq_low)

285 286
    @property
    def node(self):
287 288
        return self.int & 0xffffffffffff

289 290
    @property
    def hex(self):
291 292
        return '%032x' % self.int

293 294
    @property
    def urn(self):
295 296
        return 'urn:uuid:' + str(self)

297 298
    @property
    def variant(self):
299
        if not self.int & (0x8000 << 48):
300
            return RESERVED_NCS
301
        elif not self.int & (0x4000 << 48):
302
            return RFC_4122
303
        elif not self.int & (0x2000 << 48):
304 305 306 307
            return RESERVED_MICROSOFT
        else:
            return RESERVED_FUTURE

308 309
    @property
    def version(self):
310 311
        # The version bits are only meaningful for RFC 4122 UUIDs.
        if self.variant == RFC_4122:
312
            return int((self.int >> 76) & 0xf)
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406

def _find_mac(command, args, hw_identifiers, get_index):
    import os
    for dir in ['', '/sbin/', '/usr/sbin']:
        executable = os.path.join(dir, command)
        if not os.path.exists(executable):
            continue

        try:
            # LC_ALL to get English output, 2>/dev/null to
            # prevent output on stderr
            cmd = 'LC_ALL=C %s %s 2>/dev/null' % (executable, args)
            pipe = os.popen(cmd)
        except IOError:
            continue

        for line in pipe:
            words = line.lower().split()
            for i in range(len(words)):
                if words[i] in hw_identifiers:
                    return int(words[get_index(i)].replace(':', ''), 16)
    return None

def _ifconfig_getnode():
    """Get the hardware address on Unix by running ifconfig."""

    # This works on Linux ('' or '-a'), Tru64 ('-av'), but not all Unixes.
    for args in ('', '-a', '-av'):
        mac = _find_mac('ifconfig', args, ['hwaddr', 'ether'], lambda i: i+1)
        if mac:
            return mac

    import socket
    ip_addr = socket.gethostbyname(socket.gethostname())

    # Try getting the MAC addr from arp based on our IP address (Solaris).
    mac = _find_mac('arp', '-an', [ip_addr], lambda i: -1)
    if mac:
        return mac

    # This might work on HP-UX.
    mac = _find_mac('lanscan', '-ai', ['lan0'], lambda i: 0)
    if mac:
        return mac

    return None

def _ipconfig_getnode():
    """Get the hardware address on Windows by running ipconfig.exe."""
    import os, re
    dirs = ['', r'c:\windows\system32', r'c:\winnt\system32']
    try:
        import ctypes
        buffer = ctypes.create_string_buffer(300)
        ctypes.windll.kernel32.GetSystemDirectoryA(buffer, 300)
        dirs.insert(0, buffer.value.decode('mbcs'))
    except:
        pass
    for dir in dirs:
        try:
            pipe = os.popen(os.path.join(dir, 'ipconfig') + ' /all')
        except IOError:
            continue
        for line in pipe:
            value = line.split(':')[-1].strip().lower()
            if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value):
                return int(value.replace('-', ''), 16)

def _netbios_getnode():
    """Get the hardware address on Windows using NetBIOS calls.
    See http://support.microsoft.com/kb/118623 for details."""
    import win32wnet, netbios
    ncb = netbios.NCB()
    ncb.Command = netbios.NCBENUM
    ncb.Buffer = adapters = netbios.LANA_ENUM()
    adapters._pack()
    if win32wnet.Netbios(ncb) != 0:
        return
    adapters._unpack()
    for i in range(adapters.length):
        ncb.Reset()
        ncb.Command = netbios.NCBRESET
        ncb.Lana_num = ord(adapters.lana[i])
        if win32wnet.Netbios(ncb) != 0:
            continue
        ncb.Reset()
        ncb.Command = netbios.NCBASTAT
        ncb.Lana_num = ord(adapters.lana[i])
        ncb.Callname = '*'.ljust(16)
        ncb.Buffer = status = netbios.ADAPTER_STATUS()
        if win32wnet.Netbios(ncb) != 0:
            continue
        status._unpack()
        bytes = map(ord, status.adapter_address)
407 408
        return ((bytes[0]<<40) + (bytes[1]<<32) + (bytes[2]<<24) +
                (bytes[3]<<16) + (bytes[4]<<8) + bytes[5])
409 410 411 412

# Thanks to Thomas Heller for ctypes and for his help with its use here.

# If ctypes is available, use it to find system routines for UUID generation.
Guido van Rossum's avatar
Guido van Rossum committed
413
# XXX This makes the module non-thread-safe!
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
_uuid_generate_random = _uuid_generate_time = _UuidCreate = None
try:
    import ctypes, ctypes.util

    # The uuid_generate_* routines are provided by libuuid on at least
    # Linux and FreeBSD, and provided by libc on Mac OS X.
    for libname in ['uuid', 'c']:
        try:
            lib = ctypes.CDLL(ctypes.util.find_library(libname))
        except:
            continue
        if hasattr(lib, 'uuid_generate_random'):
            _uuid_generate_random = lib.uuid_generate_random
        if hasattr(lib, 'uuid_generate_time'):
            _uuid_generate_time = lib.uuid_generate_time

430
    # The uuid_generate_* functions are broken on MacOS X 10.5, as noted
431 432 433 434
    # in issue #8621 the function generates the same sequence of values
    # in the parent process and all children created using fork (unless
    # those children use exec as well).
    #
435
    # Assume that the uuid_generate functions are broken from 10.5 onward,
436 437 438 439
    # the test can be adjusted when a later version is fixed.
    import sys
    if sys.platform == 'darwin':
        import os
440
        if int(os.uname()[2].split('.')[0]) >= 9:
441 442
            _uuid_generate_random = _uuid_generate_time = None

443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
    # On Windows prior to 2000, UuidCreate gives a UUID containing the
    # hardware address.  On Windows 2000 and later, UuidCreate makes a
    # random UUID and UuidCreateSequential gives a UUID containing the
    # hardware address.  These routines are provided by the RPC runtime.
    # NOTE:  at least on Tim's WinXP Pro SP2 desktop box, while the last
    # 6 bytes returned by UuidCreateSequential are fixed, they don't appear
    # to bear any relationship to the MAC address of any network device
    # on the box.
    try:
        lib = ctypes.windll.rpcrt4
    except:
        lib = None
    _UuidCreate = getattr(lib, 'UuidCreateSequential',
                          getattr(lib, 'UuidCreate', None))
except:
    pass

def _unixdll_getnode():
    """Get the hardware address on Unix using ctypes."""
462
    _buffer = ctypes.create_string_buffer(16)
463
    _uuid_generate_time(_buffer)
Guido van Rossum's avatar
Guido van Rossum committed
464
    return UUID(bytes=bytes_(_buffer.raw)).node
465 466 467

def _windll_getnode():
    """Get the hardware address on Windows using ctypes."""
468
    _buffer = ctypes.create_string_buffer(16)
469
    if _UuidCreate(_buffer) == 0:
Guido van Rossum's avatar
Guido van Rossum committed
470
        return UUID(bytes=bytes_(_buffer.raw)).node
471 472 473 474

def _random_getnode():
    """Get a random node ID, with eighth bit set as suggested by RFC 4122."""
    import random
475
    return random.randrange(0, 1<<48) | 0x010000000000
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505

_node = None

def getnode():
    """Get the hardware address as a 48-bit positive integer.

    The first time this runs, it may launch a separate program, which could
    be quite slow.  If all attempts to obtain the hardware address fail, we
    choose a random 48-bit number with its eighth bit set to 1 as recommended
    in RFC 4122.
    """

    global _node
    if _node is not None:
        return _node

    import sys
    if sys.platform == 'win32':
        getters = [_windll_getnode, _netbios_getnode, _ipconfig_getnode]
    else:
        getters = [_unixdll_getnode, _ifconfig_getnode]

    for getter in getters + [_random_getnode]:
        try:
            _node = getter()
        except:
            continue
        if _node is not None:
            return _node

506 507
_last_timestamp = None

508 509 510 511 512 513 514 515 516
def uuid1(node=None, clock_seq=None):
    """Generate a UUID from a host ID, sequence number, and the current time.
    If 'node' is not given, getnode() is used to obtain the hardware
    address.  If 'clock_seq' is given, it is used as the sequence number;
    otherwise a random 14-bit sequence number is chosen."""

    # When the system provides a version-1 UUID generator, use it (but don't
    # use UuidCreate here because its UUIDs don't conform to RFC 4122).
    if _uuid_generate_time and node is clock_seq is None:
517
        _buffer = ctypes.create_string_buffer(16)
518
        _uuid_generate_time(_buffer)
Guido van Rossum's avatar
Guido van Rossum committed
519
        return UUID(bytes=bytes_(_buffer.raw))
520

521
    global _last_timestamp
522 523 524 525
    import time
    nanoseconds = int(time.time() * 1e9)
    # 0x01b21dd213814000 is the number of 100-ns intervals between the
    # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
526
    timestamp = int(nanoseconds/100) + 0x01b21dd213814000
527
    if _last_timestamp is not None and timestamp <= _last_timestamp:
528 529
        timestamp = _last_timestamp + 1
    _last_timestamp = timestamp
530 531
    if clock_seq is None:
        import random
532 533 534 535 536 537
        clock_seq = random.randrange(1<<14) # instead of stable storage
    time_low = timestamp & 0xffffffff
    time_mid = (timestamp >> 32) & 0xffff
    time_hi_version = (timestamp >> 48) & 0x0fff
    clock_seq_low = clock_seq & 0xff
    clock_seq_hi_variant = (clock_seq >> 8) & 0x3f
538 539 540 541 542 543 544
    if node is None:
        node = getnode()
    return UUID(fields=(time_low, time_mid, time_hi_version,
                        clock_seq_hi_variant, clock_seq_low, node), version=1)

def uuid3(namespace, name):
    """Generate a UUID from the MD5 hash of a namespace UUID and a name."""
545
    from hashlib import md5
546
    hash = md5(namespace.bytes + bytes(name, "utf-8")).digest()
547
    return UUID(bytes=hash[:16], version=3)
548 549 550 551 552 553

def uuid4():
    """Generate a random UUID."""

    # When the system provides a version-4 UUID generator, use it.
    if _uuid_generate_random:
554
        _buffer = ctypes.create_string_buffer(16)
555
        _uuid_generate_random(_buffer)
Guido van Rossum's avatar
Guido van Rossum committed
556
        return UUID(bytes=bytes_(_buffer.raw))
557 558 559 560 561 562 563

    # Otherwise, get randomness from urandom or the 'random' module.
    try:
        import os
        return UUID(bytes=os.urandom(16), version=4)
    except:
        import random
564
        bytes = bytes_(random.randrange(256) for i in range(16))
565 566 567 568
        return UUID(bytes=bytes, version=4)

def uuid5(namespace, name):
    """Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
569
    from hashlib import sha1
570
    hash = sha1(namespace.bytes + bytes(name, "utf-8")).digest()
571
    return UUID(bytes=hash[:16], version=5)
572 573 574 575 576 577 578

# The following standard UUIDs are for use with uuid3() or uuid5().

NAMESPACE_DNS = UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8')
NAMESPACE_URL = UUID('6ba7b811-9dad-11d1-80b4-00c04fd430c8')
NAMESPACE_OID = UUID('6ba7b812-9dad-11d1-80b4-00c04fd430c8')
NAMESPACE_X500 = UUID('6ba7b814-9dad-11d1-80b4-00c04fd430c8')