stormbrigade_sheriff/sbsheriff/Lib/site-packages/asyncpg/protocol/codecs/array.pyx

876 lines
29 KiB
Cython

# Copyright (C) 2016-present the asyncpg authors and contributors
# <see AUTHORS file>
#
# This module is part of asyncpg and is released under
# the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0
from collections.abc import (Iterable as IterableABC,
Mapping as MappingABC,
Sized as SizedABC)
from asyncpg import exceptions
DEF ARRAY_MAXDIM = 6 # defined in postgresql/src/includes/c.h
# "NULL"
cdef Py_UCS4 *APG_NULL = [0x004E, 0x0055, 0x004C, 0x004C, 0x0000]
ctypedef object (*encode_func_ex)(ConnectionSettings settings,
WriteBuffer buf,
object obj,
const void *arg)
ctypedef object (*decode_func_ex)(ConnectionSettings settings,
FRBuffer *buf,
const void *arg)
cdef inline bint _is_trivial_container(object obj):
return cpython.PyUnicode_Check(obj) or cpython.PyBytes_Check(obj) or \
cpythonx.PyByteArray_Check(obj) or cpythonx.PyMemoryView_Check(obj)
cdef inline _is_array_iterable(object obj):
return (
isinstance(obj, IterableABC) and
isinstance(obj, SizedABC) and
not _is_trivial_container(obj) and
not isinstance(obj, MappingABC)
)
cdef inline _is_sub_array_iterable(object obj):
# Sub-arrays have a specialized check, because we treat
# nested tuples as records.
return _is_array_iterable(obj) and not cpython.PyTuple_Check(obj)
cdef _get_array_shape(object obj, int32_t *dims, int32_t *ndims):
cdef:
ssize_t mylen = len(obj)
ssize_t elemlen = -2
object it
if mylen > _MAXINT32:
raise ValueError('too many elements in array value')
if ndims[0] > ARRAY_MAXDIM:
raise ValueError(
'number of array dimensions ({}) exceed the maximum expected ({})'.
format(ndims[0], ARRAY_MAXDIM))
dims[ndims[0] - 1] = <int32_t>mylen
for elem in obj:
if _is_sub_array_iterable(elem):
if elemlen == -2:
elemlen = len(elem)
if elemlen > _MAXINT32:
raise ValueError('too many elements in array value')
ndims[0] += 1
_get_array_shape(elem, dims, ndims)
else:
if len(elem) != elemlen:
raise ValueError('non-homogeneous array')
else:
if elemlen >= 0:
raise ValueError('non-homogeneous array')
else:
elemlen = -1
cdef _write_array_data(ConnectionSettings settings, object obj, int32_t ndims,
int32_t dim, WriteBuffer elem_data,
encode_func_ex encoder, const void *encoder_arg):
if dim < ndims - 1:
for item in obj:
_write_array_data(settings, item, ndims, dim + 1, elem_data,
encoder, encoder_arg)
else:
for item in obj:
if item is None:
elem_data.write_int32(-1)
else:
try:
encoder(settings, elem_data, item, encoder_arg)
except TypeError as e:
raise ValueError(
'invalid array element: {}'.format(e.args[0])) from None
cdef inline array_encode(ConnectionSettings settings, WriteBuffer buf,
object obj, uint32_t elem_oid,
encode_func_ex encoder, const void *encoder_arg):
cdef:
WriteBuffer elem_data
int32_t dims[ARRAY_MAXDIM]
int32_t ndims = 1
int32_t i
if not _is_array_iterable(obj):
raise TypeError(
'a sized iterable container expected (got type {!r})'.format(
type(obj).__name__))
_get_array_shape(obj, dims, &ndims)
elem_data = WriteBuffer.new()
if ndims > 1:
_write_array_data(settings, obj, ndims, 0, elem_data,
encoder, encoder_arg)
else:
for i, item in enumerate(obj):
if item is None:
elem_data.write_int32(-1)
else:
try:
encoder(settings, elem_data, item, encoder_arg)
except TypeError as e:
raise ValueError(
'invalid array element at index {}: {}'.format(
i, e.args[0])) from None
buf.write_int32(12 + 8 * ndims + elem_data.len())
# Number of dimensions
buf.write_int32(ndims)
# flags
buf.write_int32(0)
# element type
buf.write_int32(<int32_t>elem_oid)
# upper / lower bounds
for i in range(ndims):
buf.write_int32(dims[i])
buf.write_int32(1)
# element data
buf.write_buffer(elem_data)
cdef _write_textarray_data(ConnectionSettings settings, object obj,
int32_t ndims, int32_t dim, WriteBuffer array_data,
encode_func_ex encoder, const void *encoder_arg,
Py_UCS4 typdelim):
cdef:
ssize_t i = 0
int8_t delim = <int8_t>typdelim
WriteBuffer elem_data
Py_buffer pybuf
const char *elem_str
char ch
ssize_t elem_len
ssize_t quoted_elem_len
bint need_quoting
array_data.write_byte(b'{')
if dim < ndims - 1:
for item in obj:
if i > 0:
array_data.write_byte(delim)
array_data.write_byte(b' ')
_write_textarray_data(settings, item, ndims, dim + 1, array_data,
encoder, encoder_arg, typdelim)
i += 1
else:
for item in obj:
elem_data = WriteBuffer.new()
if i > 0:
array_data.write_byte(delim)
array_data.write_byte(b' ')
if item is None:
array_data.write_bytes(b'NULL')
i += 1
continue
else:
try:
encoder(settings, elem_data, item, encoder_arg)
except TypeError as e:
raise ValueError(
'invalid array element: {}'.format(
e.args[0])) from None
# element string length (first four bytes are the encoded length.)
elem_len = elem_data.len() - 4
if elem_len == 0:
# Empty string
array_data.write_bytes(b'""')
else:
cpython.PyObject_GetBuffer(
elem_data, &pybuf, cpython.PyBUF_SIMPLE)
elem_str = <const char*>(pybuf.buf) + 4
try:
if not apg_strcasecmp_char(elem_str, b'NULL'):
array_data.write_byte(b'"')
array_data.write_cstr(elem_str, 4)
array_data.write_byte(b'"')
else:
quoted_elem_len = elem_len
need_quoting = False
for i in range(elem_len):
ch = elem_str[i]
if ch == b'"' or ch == b'\\':
# Quotes and backslashes need escaping.
quoted_elem_len += 1
need_quoting = True
elif (ch == b'{' or ch == b'}' or ch == delim or
apg_ascii_isspace(<uint32_t>ch)):
need_quoting = True
if need_quoting:
array_data.write_byte(b'"')
if quoted_elem_len == elem_len:
array_data.write_cstr(elem_str, elem_len)
else:
# Escaping required.
for i in range(elem_len):
ch = elem_str[i]
if ch == b'"' or ch == b'\\':
array_data.write_byte(b'\\')
array_data.write_byte(ch)
array_data.write_byte(b'"')
else:
array_data.write_cstr(elem_str, elem_len)
finally:
cpython.PyBuffer_Release(&pybuf)
i += 1
array_data.write_byte(b'}')
cdef inline textarray_encode(ConnectionSettings settings, WriteBuffer buf,
object obj, encode_func_ex encoder,
const void *encoder_arg, Py_UCS4 typdelim):
cdef:
WriteBuffer array_data
int32_t dims[ARRAY_MAXDIM]
int32_t ndims = 1
int32_t i
if not _is_array_iterable(obj):
raise TypeError(
'a sized iterable container expected (got type {!r})'.format(
type(obj).__name__))
_get_array_shape(obj, dims, &ndims)
array_data = WriteBuffer.new()
_write_textarray_data(settings, obj, ndims, 0, array_data,
encoder, encoder_arg, typdelim)
buf.write_int32(array_data.len())
buf.write_buffer(array_data)
cdef inline array_decode(ConnectionSettings settings, FRBuffer *buf,
decode_func_ex decoder, const void *decoder_arg):
cdef:
int32_t ndims = hton.unpack_int32(frb_read(buf, 4))
int32_t flags = hton.unpack_int32(frb_read(buf, 4))
uint32_t elem_oid = <uint32_t>hton.unpack_int32(frb_read(buf, 4))
list result
int i
int32_t elem_len
int32_t elem_count = 1
FRBuffer elem_buf
int32_t dims[ARRAY_MAXDIM]
Codec elem_codec
if ndims == 0:
return []
if ndims > ARRAY_MAXDIM:
raise exceptions.ProtocolError(
'number of array dimensions ({}) exceed the maximum expected ({})'.
format(ndims, ARRAY_MAXDIM))
elif ndims < 0:
raise exceptions.ProtocolError(
'unexpected array dimensions value: {}'.format(ndims))
for i in range(ndims):
dims[i] = hton.unpack_int32(frb_read(buf, 4))
if dims[i] < 0:
raise exceptions.ProtocolError(
'unexpected array dimension size: {}'.format(dims[i]))
# Ignore the lower bound information
frb_read(buf, 4)
if ndims == 1:
# Fast path for flat arrays
elem_count = dims[0]
result = cpython.PyList_New(elem_count)
for i in range(elem_count):
elem_len = hton.unpack_int32(frb_read(buf, 4))
if elem_len == -1:
elem = None
else:
frb_slice_from(&elem_buf, buf, elem_len)
elem = decoder(settings, &elem_buf, decoder_arg)
cpython.Py_INCREF(elem)
cpython.PyList_SET_ITEM(result, i, elem)
else:
result = _nested_array_decode(settings, buf,
decoder, decoder_arg, ndims, dims,
&elem_buf)
return result
cdef _nested_array_decode(ConnectionSettings settings,
FRBuffer *buf,
decode_func_ex decoder,
const void *decoder_arg,
int32_t ndims, int32_t *dims,
FRBuffer *elem_buf):
cdef:
int32_t elem_len
int64_t i, j
int64_t array_len = 1
object elem, stride
# An array of pointers to lists for each current array level.
void *strides[ARRAY_MAXDIM]
# An array of current positions at each array level.
int32_t indexes[ARRAY_MAXDIM]
for i in range(ndims):
array_len *= dims[i]
indexes[i] = 0
strides[i] = NULL
if array_len == 0:
# A multidimensional array with a zero-sized dimension?
return []
elif array_len < 0:
# Array length overflow
raise exceptions.ProtocolError('array length overflow')
for i in range(array_len):
# Decode the element.
elem_len = hton.unpack_int32(frb_read(buf, 4))
if elem_len == -1:
elem = None
else:
elem = decoder(settings,
frb_slice_from(elem_buf, buf, elem_len),
decoder_arg)
# Take an explicit reference for PyList_SET_ITEM in the below
# loop expects this.
cpython.Py_INCREF(elem)
# Iterate over array dimentions and put the element in
# the correctly nested sublist.
for j in reversed(range(ndims)):
if indexes[j] == 0:
# Allocate the list for this array level.
stride = cpython.PyList_New(dims[j])
strides[j] = <void*><cpython.PyObject>stride
# Take an explicit reference for PyList_SET_ITEM below
# expects this.
cpython.Py_INCREF(stride)
stride = <object><cpython.PyObject*>strides[j]
cpython.PyList_SET_ITEM(stride, indexes[j], elem)
indexes[j] += 1
if indexes[j] == dims[j] and j != 0:
# This array level is full, continue the
# ascent in the dimensions so that this level
# sublist will be appened to the parent list.
elem = stride
# Reset the index, this will cause the
# new list to be allocated on the next
# iteration on this array axis.
indexes[j] = 0
else:
break
stride = <object><cpython.PyObject*>strides[0]
# Since each element in strides has a refcount of 1,
# returning strides[0] will increment it to 2, so
# balance that.
cpython.Py_DECREF(stride)
return stride
cdef textarray_decode(ConnectionSettings settings, FRBuffer *buf,
decode_func_ex decoder, const void *decoder_arg,
Py_UCS4 typdelim):
cdef:
Py_UCS4 *array_text
str s
# Make a copy of array data since we will be mutating it for
# the purposes of element decoding.
s = pgproto.text_decode(settings, buf)
array_text = cpythonx.PyUnicode_AsUCS4Copy(s)
try:
return _textarray_decode(
settings, array_text, decoder, decoder_arg, typdelim)
except ValueError as e:
raise exceptions.ProtocolError(
'malformed array literal {!r}: {}'.format(s, e.args[0]))
finally:
cpython.PyMem_Free(array_text)
cdef _textarray_decode(ConnectionSettings settings,
Py_UCS4 *array_text,
decode_func_ex decoder,
const void *decoder_arg,
Py_UCS4 typdelim):
cdef:
bytearray array_bytes
list result
list new_stride
Py_UCS4 *ptr
int32_t ndims = 0
int32_t ubound = 0
int32_t lbound = 0
int32_t dims[ARRAY_MAXDIM]
int32_t inferred_dims[ARRAY_MAXDIM]
int32_t inferred_ndims = 0
void *strides[ARRAY_MAXDIM]
int32_t indexes[ARRAY_MAXDIM]
int32_t nest_level = 0
int32_t item_level = 0
bint end_of_array = False
bint end_of_item = False
bint has_quoting = False
bint strip_spaces = False
bint in_quotes = False
Py_UCS4 *item_start
Py_UCS4 *item_ptr
Py_UCS4 *item_end
int i
object item
str item_text
FRBuffer item_buf
char *pg_item_str
ssize_t pg_item_len
ptr = array_text
while True:
while apg_ascii_isspace(ptr[0]):
ptr += 1
if ptr[0] != '[':
# Finished parsing dimensions spec.
break
ptr += 1 # '['
if ndims > ARRAY_MAXDIM:
raise ValueError(
'number of array dimensions ({}) exceed the '
'maximum expected ({})'.format(ndims, ARRAY_MAXDIM))
ptr = apg_parse_int32(ptr, &ubound)
if ptr == NULL:
raise ValueError('missing array dimension value')
if ptr[0] == ':':
ptr += 1
lbound = ubound
# [lower:upper] spec. We disregard the lbound for decoding.
ptr = apg_parse_int32(ptr, &ubound)
if ptr == NULL:
raise ValueError('missing array dimension value')
else:
lbound = 1
if ptr[0] != ']':
raise ValueError('missing \']\' after array dimensions')
ptr += 1 # ']'
dims[ndims] = ubound - lbound + 1
ndims += 1
if ndims != 0:
# If dimensions were given, the '=' token is expected.
if ptr[0] != '=':
raise ValueError('missing \'=\' after array dimensions')
ptr += 1 # '='
# Skip any whitespace after the '=', whitespace
# before was consumed in the above loop.
while apg_ascii_isspace(ptr[0]):
ptr += 1
# Infer the dimensions from the brace structure in the
# array literal body, and check that it matches the explicit
# spec. This also validates that the array literal is sane.
_infer_array_dims(ptr, typdelim, inferred_dims, &inferred_ndims)
if inferred_ndims != ndims:
raise ValueError(
'specified array dimensions do not match array content')
for i in range(ndims):
if inferred_dims[i] != dims[i]:
raise ValueError(
'specified array dimensions do not match array content')
else:
# Infer the dimensions from the brace structure in the array literal
# body. This also validates that the array literal is sane.
_infer_array_dims(ptr, typdelim, dims, &ndims)
while not end_of_array:
# We iterate over the literal character by character
# and modify the string in-place removing the array-specific
# quoting and determining the boundaries of each element.
end_of_item = has_quoting = in_quotes = False
strip_spaces = True
# Pointers to array element start, end, and the current pointer
# tracking the position where characters are written when
# escaping is folded.
item_start = item_end = item_ptr = ptr
item_level = 0
while not end_of_item:
if ptr[0] == '"':
in_quotes = not in_quotes
if in_quotes:
strip_spaces = False
else:
item_end = item_ptr
has_quoting = True
elif ptr[0] == '\\':
# Quoted character, collapse the backslash.
ptr += 1
has_quoting = True
item_ptr[0] = ptr[0]
item_ptr += 1
strip_spaces = False
item_end = item_ptr
elif in_quotes:
# Consume the string until we see the closing quote.
item_ptr[0] = ptr[0]
item_ptr += 1
elif ptr[0] == '{':
# Nesting level increase.
nest_level += 1
indexes[nest_level - 1] = 0
new_stride = cpython.PyList_New(dims[nest_level - 1])
strides[nest_level - 1] = \
<void*>(<cpython.PyObject>new_stride)
if nest_level > 1:
cpython.Py_INCREF(new_stride)
cpython.PyList_SET_ITEM(
<object><cpython.PyObject*>strides[nest_level - 2],
indexes[nest_level - 2],
new_stride)
else:
result = new_stride
elif ptr[0] == '}':
if item_level == 0:
# Make sure we keep track of which nesting
# level the item belongs to, as the loop
# will continue to consume closing braces
# until the delimiter or the end of input.
item_level = nest_level
nest_level -= 1
if nest_level == 0:
end_of_array = end_of_item = True
elif ptr[0] == typdelim:
# Array element delimiter,
end_of_item = True
if item_level == 0:
item_level = nest_level
elif apg_ascii_isspace(ptr[0]):
if not strip_spaces:
item_ptr[0] = ptr[0]
item_ptr += 1
# Ignore the leading literal whitespace.
else:
item_ptr[0] = ptr[0]
item_ptr += 1
strip_spaces = False
item_end = item_ptr
ptr += 1
# end while not end_of_item
if item_end == item_start:
# Empty array
continue
item_end[0] = '\0'
if not has_quoting and apg_strcasecmp(item_start, APG_NULL) == 0:
# NULL element.
item = None
else:
# XXX: find a way to avoid the redundant encode/decode
# cycle here.
item_text = cpythonx.PyUnicode_FromKindAndData(
cpythonx.PyUnicode_4BYTE_KIND,
<void *>item_start,
item_end - item_start)
# Prepare the element buffer and call the text decoder
# for the element type.
pgproto.as_pg_string_and_size(
settings, item_text, &pg_item_str, &pg_item_len)
frb_init(&item_buf, pg_item_str, pg_item_len)
item = decoder(settings, &item_buf, decoder_arg)
# Place the decoded element in the array.
cpython.Py_INCREF(item)
cpython.PyList_SET_ITEM(
<object><cpython.PyObject*>strides[item_level - 1],
indexes[item_level - 1],
item)
if nest_level > 0:
indexes[nest_level - 1] += 1
return result
cdef enum _ArrayParseState:
APS_START = 1
APS_STRIDE_STARTED = 2
APS_STRIDE_DONE = 3
APS_STRIDE_DELIMITED = 4
APS_ELEM_STARTED = 5
APS_ELEM_DELIMITED = 6
cdef _UnexpectedCharacter(const Py_UCS4 *array_text, const Py_UCS4 *ptr):
return ValueError('unexpected character {!r} at position {}'.format(
cpython.PyUnicode_FromOrdinal(<int>ptr[0]), ptr - array_text + 1))
cdef _infer_array_dims(const Py_UCS4 *array_text,
Py_UCS4 typdelim,
int32_t *dims,
int32_t *ndims):
cdef:
const Py_UCS4 *ptr = array_text
int i
int nest_level = 0
bint end_of_array = False
bint end_of_item = False
bint in_quotes = False
bint array_is_empty = True
int stride_len[ARRAY_MAXDIM]
int prev_stride_len[ARRAY_MAXDIM]
_ArrayParseState parse_state = APS_START
for i in range(ARRAY_MAXDIM):
dims[i] = prev_stride_len[i] = 0
stride_len[i] = 1
while not end_of_array:
end_of_item = False
while not end_of_item:
if ptr[0] == '\0':
raise ValueError('unexpected end of string')
elif ptr[0] == '"':
if (parse_state not in (APS_STRIDE_STARTED,
APS_ELEM_DELIMITED) and
not (parse_state == APS_ELEM_STARTED and in_quotes)):
raise _UnexpectedCharacter(array_text, ptr)
in_quotes = not in_quotes
if in_quotes:
parse_state = APS_ELEM_STARTED
array_is_empty = False
elif ptr[0] == '\\':
if parse_state not in (APS_STRIDE_STARTED,
APS_ELEM_STARTED,
APS_ELEM_DELIMITED):
raise _UnexpectedCharacter(array_text, ptr)
parse_state = APS_ELEM_STARTED
array_is_empty = False
if ptr[1] != '\0':
ptr += 1
else:
raise ValueError('unexpected end of string')
elif in_quotes:
# Ignore everything inside the quotes.
pass
elif ptr[0] == '{':
if parse_state not in (APS_START,
APS_STRIDE_STARTED,
APS_STRIDE_DELIMITED):
raise _UnexpectedCharacter(array_text, ptr)
parse_state = APS_STRIDE_STARTED
if nest_level >= ARRAY_MAXDIM:
raise ValueError(
'number of array dimensions ({}) exceed the '
'maximum expected ({})'.format(
nest_level, ARRAY_MAXDIM))
dims[nest_level] = 0
nest_level += 1
if ndims[0] < nest_level:
ndims[0] = nest_level
elif ptr[0] == '}':
if (parse_state not in (APS_ELEM_STARTED, APS_STRIDE_DONE) and
not (nest_level == 1 and
parse_state == APS_STRIDE_STARTED)):
raise _UnexpectedCharacter(array_text, ptr)
parse_state = APS_STRIDE_DONE
if nest_level == 0:
raise _UnexpectedCharacter(array_text, ptr)
nest_level -= 1
if (prev_stride_len[nest_level] != 0 and
stride_len[nest_level] != prev_stride_len[nest_level]):
raise ValueError(
'inconsistent sub-array dimensions'
' at position {}'.format(
ptr - array_text + 1))
prev_stride_len[nest_level] = stride_len[nest_level]
stride_len[nest_level] = 1
if nest_level == 0:
end_of_array = end_of_item = True
else:
dims[nest_level - 1] += 1
elif ptr[0] == typdelim:
if parse_state not in (APS_ELEM_STARTED, APS_STRIDE_DONE):
raise _UnexpectedCharacter(array_text, ptr)
if parse_state == APS_STRIDE_DONE:
parse_state = APS_STRIDE_DELIMITED
else:
parse_state = APS_ELEM_DELIMITED
end_of_item = True
stride_len[nest_level - 1] += 1
elif not apg_ascii_isspace(ptr[0]):
if parse_state not in (APS_STRIDE_STARTED,
APS_ELEM_STARTED,
APS_ELEM_DELIMITED):
raise _UnexpectedCharacter(array_text, ptr)
parse_state = APS_ELEM_STARTED
array_is_empty = False
if not end_of_item:
ptr += 1
if not array_is_empty:
dims[ndims[0] - 1] += 1
ptr += 1
# only whitespace is allowed after the closing brace
while ptr[0] != '\0':
if not apg_ascii_isspace(ptr[0]):
raise _UnexpectedCharacter(array_text, ptr)
ptr += 1
if array_is_empty:
ndims[0] = 0
cdef uint4_encode_ex(ConnectionSettings settings, WriteBuffer buf, object obj,
const void *arg):
return pgproto.uint4_encode(settings, buf, obj)
cdef uint4_decode_ex(ConnectionSettings settings, FRBuffer *buf,
const void *arg):
return pgproto.uint4_decode(settings, buf)
cdef arrayoid_encode(ConnectionSettings settings, WriteBuffer buf, items):
array_encode(settings, buf, items, OIDOID,
<encode_func_ex>&uint4_encode_ex, NULL)
cdef arrayoid_decode(ConnectionSettings settings, FRBuffer *buf):
return array_decode(settings, buf, <decode_func_ex>&uint4_decode_ex, NULL)
cdef text_encode_ex(ConnectionSettings settings, WriteBuffer buf, object obj,
const void *arg):
return pgproto.text_encode(settings, buf, obj)
cdef text_decode_ex(ConnectionSettings settings, FRBuffer *buf,
const void *arg):
return pgproto.text_decode(settings, buf)
cdef arraytext_encode(ConnectionSettings settings, WriteBuffer buf, items):
array_encode(settings, buf, items, TEXTOID,
<encode_func_ex>&text_encode_ex, NULL)
cdef arraytext_decode(ConnectionSettings settings, FRBuffer *buf):
return array_decode(settings, buf, <decode_func_ex>&text_decode_ex, NULL)
cdef init_array_codecs():
# oid[] and text[] are registered as core codecs
# to make type introspection query work
#
register_core_codec(_OIDOID,
<encode_func>&arrayoid_encode,
<decode_func>&arrayoid_decode,
PG_FORMAT_BINARY)
register_core_codec(_TEXTOID,
<encode_func>&arraytext_encode,
<decode_func>&arraytext_decode,
PG_FORMAT_BINARY)
init_array_codecs()