340 lines
9.4 KiB
Cython
340 lines
9.4 KiB
Cython
import functools
|
|
import uuid
|
|
|
|
from libc.stdint cimport uint8_t, int8_t
|
|
from libc.string cimport memcpy, memcmp
|
|
|
|
|
|
# A more efficient UUID type implementation
|
|
# (6-7x faster than the starndard uuid.UUID):
|
|
#
|
|
# -= Benchmark results (less is better): =-
|
|
#
|
|
# std_UUID(bytes): 1.2368
|
|
# c_UUID(bytes): * 0.1645 (7.52x)
|
|
# object(): 0.1483
|
|
#
|
|
# std_UUID(str): 1.8038
|
|
# c_UUID(str): * 0.2313 (7.80x)
|
|
#
|
|
# str(std_UUID()): 1.4625
|
|
# str(c_UUID()): * 0.2681 (5.46x)
|
|
# str(object()): 0.5975
|
|
#
|
|
# std_UUID().bytes: 0.3508
|
|
# c_UUID().bytes: * 0.1068 (3.28x)
|
|
#
|
|
# std_UUID().int: 0.0871
|
|
# c_UUID().int: * 0.0856
|
|
#
|
|
# std_UUID().hex: 0.4871
|
|
# c_UUID().hex: * 0.1405
|
|
#
|
|
# hash(std_UUID()): 0.3635
|
|
# hash(c_UUID()): * 0.1564 (2.32x)
|
|
#
|
|
# dct[std_UUID()]: 0.3319
|
|
# dct[c_UUID()]: * 0.1570 (2.11x)
|
|
#
|
|
# std_UUID() ==: 0.3478
|
|
# c_UUID() ==: * 0.0915 (3.80x)
|
|
|
|
|
|
cdef char _hextable[256]
|
|
_hextable[:] = [
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
-1,-1, 0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,-1,10,11,12,13,14,15,-1,
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
-1,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
|
|
]
|
|
|
|
|
|
cdef std_UUID = uuid.UUID
|
|
|
|
|
|
cdef pg_uuid_bytes_from_str(str u, char *out):
|
|
cdef:
|
|
char *orig_buf
|
|
Py_ssize_t size
|
|
unsigned char ch
|
|
uint8_t acc, part, acc_set
|
|
int i, j
|
|
|
|
orig_buf = <char*>cpythonx.PyUnicode_AsUTF8AndSize(u, &size)
|
|
if size > 36 or size < 32:
|
|
raise ValueError(
|
|
f'invalid UUID {u!r}: '
|
|
f'length must be between 32..36 characters, got {size}')
|
|
|
|
acc_set = 0
|
|
j = 0
|
|
for i in range(size):
|
|
ch = <unsigned char>orig_buf[i]
|
|
if ch == <unsigned char>b'-':
|
|
continue
|
|
|
|
part = <uint8_t><int8_t>_hextable[ch]
|
|
if part == <uint8_t>-1:
|
|
if ch >= 0x20 and ch <= 0x7e:
|
|
raise ValueError(
|
|
f'invalid UUID {u!r}: unexpected character {chr(ch)!r}')
|
|
else:
|
|
raise ValueError('invalid UUID {u!r}: unexpected character')
|
|
|
|
if acc_set:
|
|
acc |= part
|
|
out[j] = <char>acc
|
|
acc_set = 0
|
|
j += 1
|
|
else:
|
|
acc = <uint8_t>(part << 4)
|
|
acc_set = 1
|
|
|
|
if j > 16 or (j == 16 and acc_set):
|
|
raise ValueError(
|
|
f'invalid UUID {u!r}: decodes to more than 16 bytes')
|
|
|
|
if j != 16:
|
|
raise ValueError(
|
|
f'invalid UUID {u!r}: decodes to less than 16 bytes')
|
|
|
|
|
|
cdef class __UUIDReplaceMe:
|
|
pass
|
|
|
|
|
|
cdef pg_uuid_from_buf(const char *buf):
|
|
cdef:
|
|
UUID u = UUID.__new__(UUID)
|
|
memcpy(u._data, buf, 16)
|
|
return u
|
|
|
|
|
|
@cython.final
|
|
@cython.no_gc_clear
|
|
cdef class UUID(__UUIDReplaceMe):
|
|
|
|
cdef:
|
|
char _data[16]
|
|
object _int
|
|
object _hash
|
|
object __weakref__
|
|
|
|
def __cinit__(self):
|
|
self._int = None
|
|
self._hash = None
|
|
|
|
def __init__(self, inp):
|
|
cdef:
|
|
char *buf
|
|
Py_ssize_t size
|
|
|
|
if cpython.PyBytes_Check(inp):
|
|
cpython.PyBytes_AsStringAndSize(inp, &buf, &size)
|
|
if size != 16:
|
|
raise ValueError(f'16 bytes were expected, got {size}')
|
|
memcpy(self._data, buf, 16)
|
|
|
|
elif cpython.PyUnicode_Check(inp):
|
|
pg_uuid_bytes_from_str(inp, self._data)
|
|
else:
|
|
raise TypeError(f'a bytes or str object expected, got {inp!r}')
|
|
|
|
@property
|
|
def bytes(self):
|
|
return cpython.PyBytes_FromStringAndSize(self._data, 16)
|
|
|
|
@property
|
|
def int(self):
|
|
if self._int is None:
|
|
# The cache is important because `self.int` can be
|
|
# used multiple times by __hash__ etc.
|
|
self._int = int.from_bytes(self.bytes, 'big')
|
|
return self._int
|
|
|
|
@property
|
|
def is_safe(self):
|
|
return uuid.SafeUUID.unknown
|
|
|
|
def __str__(self):
|
|
cdef char out[36]
|
|
tohex.uuid_to_str(self._data, out)
|
|
return cpythonx.PyUnicode_FromKindAndData(
|
|
cpythonx.PyUnicode_1BYTE_KIND, <void*>out, 36)
|
|
|
|
@property
|
|
def hex(self):
|
|
cdef char out[32]
|
|
tohex.uuid_to_hex(self._data, out)
|
|
return cpythonx.PyUnicode_FromKindAndData(
|
|
cpythonx.PyUnicode_1BYTE_KIND, <void*>out, 32)
|
|
|
|
def __repr__(self):
|
|
return f"UUID('{self}')"
|
|
|
|
def __reduce__(self):
|
|
return (type(self), (self.bytes,))
|
|
|
|
def __eq__(self, other):
|
|
if type(other) is UUID:
|
|
return memcmp(self._data, (<UUID>other)._data, 16) == 0
|
|
if isinstance(other, std_UUID):
|
|
return self.int == other.int
|
|
return NotImplemented
|
|
|
|
def __ne__(self, other):
|
|
if type(other) is UUID:
|
|
return memcmp(self._data, (<UUID>other)._data, 16) != 0
|
|
if isinstance(other, std_UUID):
|
|
return self.int != other.int
|
|
return NotImplemented
|
|
|
|
def __lt__(self, other):
|
|
if type(other) is UUID:
|
|
return memcmp(self._data, (<UUID>other)._data, 16) < 0
|
|
if isinstance(other, std_UUID):
|
|
return self.int < other.int
|
|
return NotImplemented
|
|
|
|
def __gt__(self, other):
|
|
if type(other) is UUID:
|
|
return memcmp(self._data, (<UUID>other)._data, 16) > 0
|
|
if isinstance(other, std_UUID):
|
|
return self.int > other.int
|
|
return NotImplemented
|
|
|
|
def __le__(self, other):
|
|
if type(other) is UUID:
|
|
return memcmp(self._data, (<UUID>other)._data, 16) <= 0
|
|
if isinstance(other, std_UUID):
|
|
return self.int <= other.int
|
|
return NotImplemented
|
|
|
|
def __ge__(self, other):
|
|
if type(other) is UUID:
|
|
return memcmp(self._data, (<UUID>other)._data, 16) >= 0
|
|
if isinstance(other, std_UUID):
|
|
return self.int >= other.int
|
|
return NotImplemented
|
|
|
|
def __hash__(self):
|
|
# In EdgeDB every schema object has a uuid and there are
|
|
# huge hash-maps of them. We want UUID.__hash__ to be
|
|
# as fast as possible.
|
|
if self._hash is not None:
|
|
return self._hash
|
|
|
|
self._hash = hash(self.int)
|
|
return self._hash
|
|
|
|
def __int__(self):
|
|
return self.int
|
|
|
|
@property
|
|
def bytes_le(self):
|
|
bytes = self.bytes
|
|
return (bytes[4-1::-1] + bytes[6-1:4-1:-1] + bytes[8-1:6-1:-1] +
|
|
bytes[8:])
|
|
|
|
@property
|
|
def fields(self):
|
|
return (self.time_low, self.time_mid, self.time_hi_version,
|
|
self.clock_seq_hi_variant, self.clock_seq_low, self.node)
|
|
|
|
@property
|
|
def time_low(self):
|
|
return self.int >> 96
|
|
|
|
@property
|
|
def time_mid(self):
|
|
return (self.int >> 80) & 0xffff
|
|
|
|
@property
|
|
def time_hi_version(self):
|
|
return (self.int >> 64) & 0xffff
|
|
|
|
@property
|
|
def clock_seq_hi_variant(self):
|
|
return (self.int >> 56) & 0xff
|
|
|
|
@property
|
|
def clock_seq_low(self):
|
|
return (self.int >> 48) & 0xff
|
|
|
|
@property
|
|
def time(self):
|
|
return (((self.time_hi_version & 0x0fff) << 48) |
|
|
(self.time_mid << 32) | self.time_low)
|
|
|
|
@property
|
|
def clock_seq(self):
|
|
return (((self.clock_seq_hi_variant & 0x3f) << 8) |
|
|
self.clock_seq_low)
|
|
|
|
@property
|
|
def node(self):
|
|
return self.int & 0xffffffffffff
|
|
|
|
@property
|
|
def urn(self):
|
|
return 'urn:uuid:' + str(self)
|
|
|
|
@property
|
|
def variant(self):
|
|
if not self.int & (0x8000 << 48):
|
|
return uuid.RESERVED_NCS
|
|
elif not self.int & (0x4000 << 48):
|
|
return uuid.RFC_4122
|
|
elif not self.int & (0x2000 << 48):
|
|
return uuid.RESERVED_MICROSOFT
|
|
else:
|
|
return uuid.RESERVED_FUTURE
|
|
|
|
@property
|
|
def version(self):
|
|
# The version bits are only meaningful for RFC 4122 UUIDs.
|
|
if self.variant == uuid.RFC_4122:
|
|
return int((self.int >> 76) & 0xf)
|
|
|
|
|
|
# <hack>
|
|
# In order for `isinstance(pgproto.UUID, uuid.UUID)` to work,
|
|
# patch __bases__ and __mro__ by injecting `uuid.UUID`.
|
|
#
|
|
# We apply brute-force here because the following pattern stopped
|
|
# working with Python 3.8:
|
|
#
|
|
# cdef class OurUUID:
|
|
# ...
|
|
#
|
|
# class UUID(OurUUID, uuid.UUID):
|
|
# ...
|
|
#
|
|
# With Python 3.8 it now produces
|
|
#
|
|
# "TypeError: multiple bases have instance lay-out conflict"
|
|
#
|
|
# error. Maybe it's possible to fix this some other way, but
|
|
# the best solution possible would be to just contribute our
|
|
# faster UUID to the standard library and not have this problem
|
|
# at all. For now this hack is pretty safe and should be
|
|
# compatible with future Pythons for long enough.
|
|
#
|
|
assert UUID.__bases__[0] is __UUIDReplaceMe
|
|
assert UUID.__mro__[1] is __UUIDReplaceMe
|
|
cpython.Py_INCREF(std_UUID)
|
|
cpython.PyTuple_SET_ITEM(UUID.__bases__, 0, std_UUID)
|
|
cpython.Py_INCREF(std_UUID)
|
|
cpython.PyTuple_SET_ITEM(UUID.__mro__, 1, std_UUID)
|
|
# </hack>
|
|
|
|
|
|
cdef pg_UUID = UUID
|