├── 2023 ├── Makefile ├── native │ ├── __init__.py │ ├── chunked_stream.h │ ├── chunked_stream.pxd │ ├── cpython.pxd │ ├── mi_heap_destroy_stl_allocator.h │ ├── mi_heap_destroy_stl_allocator.pxd │ ├── mi_heap_destroy_stl_allocator.pyx │ ├── numpy.pxd │ ├── optional.pxd │ ├── sorted_intersection.h │ ├── string_view.pxd │ ├── sum_repeated_with_step.h │ └── utf8.pxd └── pyx │ ├── asyncpg_recordobj.h │ ├── dag_accelerated.h │ ├── dag_accelerated.pyx │ ├── interval_intersections.h │ ├── interval_intersections.pyx │ ├── io.pyx │ ├── object_arrays.pyx │ ├── sentry_native.pyx │ ├── sorted_ops.pyx │ ├── sql_builders.h │ ├── sql_builders.pyx │ ├── types_accelerated.pyx │ ├── unordered_unique.pyx │ ├── utils_accelerated.h │ ├── utils_accelerated.pyx │ ├── web_model_io.h │ └── web_model_io.pyx ├── LICENSE ├── README.md ├── async_utils.py ├── asyncpg_recordobj.h ├── to_object_arrays.pyx └── typing_utils.py /2023/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build-native 2 | build-native: 3 | cmake -S athenian/api/sentry_native -B athenian/api/sentry_native/build -D SENTRY_BACKEND=crashpad -D SENTRY_BUILD_EXAMPLES=OFF -D SENTRY_BUILD_TESTS=OFF -D CMAKE_BUILD_TYPE=RelWithDebInfo 4 | cmake --build athenian/api/sentry_native/build --parallel 5 | cmake -S athenian/api/mimalloc -B athenian/api/mimalloc/build -D mi_cflags=-flto -D MI_BUILD_STATIC=OFF -D MI_BUILD_OBJECT=OFF -D MI_BUILD_TESTS=OFF -D MI_INSTALL_TOPLEVEL=ON -D MI_USE_CXX=OFF -D CMAKE_BUILD_TYPE=RelWithDebInfo 6 | cmake --build athenian/api/mimalloc/build --parallel 7 | 8 | .PHONY: install-native 9 | install-native: build-native 10 | sudo cmake --install athenian/api/sentry_native/build 11 | sudo cmake --install athenian/api/mimalloc/build 12 | 13 | PHONY: install-native-user 14 | install-native-user: build-native 15 | cmake --install athenian/api/sentry_native/build 16 | cmake --install athenian/api/mimalloc/build 17 | 18 | PHONY: clean-native 19 | clean-native: 20 | rm -rf athenian/api/sentry_native/build 21 | rm -rf athenian/api/mimalloc/build 22 | -------------------------------------------------------------------------------- /2023/native/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/athenianco/athenian-api-open/810d2b894ddf1a2ea28d22e78d7eb34f52a0f32a/2023/native/__init__.py -------------------------------------------------------------------------------- /2023/native/chunked_stream.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mi_heap_destroy_stl_allocator.h" 3 | 4 | template 5 | class chunked_stream { 6 | public: 7 | template 8 | explicit chunked_stream(mi_heap_destroy_stl_allocator &alloc): chunks_(alloc), pos_(0) { 9 | chunks_.emplace_back(alloc).reserve(chunk_size); 10 | } 11 | 12 | void write(const void *buffer, size_t size) { 13 | const char *input = reinterpret_cast(buffer); 14 | int avail = chunk_size - pos_; 15 | while (size > static_cast(avail)) { 16 | memcpy(chunks_.back().data() + pos_, input, avail); 17 | size -= avail; 18 | input += avail; 19 | pos_ = 0; 20 | chunks_.emplace_back(chunks_.get_allocator()).reserve(chunk_size); 21 | avail = chunk_size; 22 | } 23 | memcpy(chunks_.back().data() + pos_, input, size); 24 | pos_ += size; 25 | } 26 | 27 | size_t dump(char *output, size_t output_size) noexcept { 28 | size_t total_size = size(); 29 | if (output_size > total_size) { 30 | output_size = total_size; 31 | } 32 | size_t left = output_size; 33 | auto it = chunks_.begin(); 34 | while (left > chunk_size) { 35 | memcpy(output, it->data(), chunk_size); 36 | left -= chunk_size; 37 | output += chunk_size; 38 | it++; 39 | } 40 | if (left > 0) { 41 | memcpy(output, it->data(), left); 42 | } 43 | return output_size; 44 | } 45 | 46 | size_t size() const noexcept { return (chunks_.size() - 1) * chunk_size + pos_; } 47 | 48 | private: 49 | std::list, mi_heap_destroy_stl_allocator>> chunks_; 50 | int pos_; 51 | }; 52 | -------------------------------------------------------------------------------- /2023/native/chunked_stream.pxd: -------------------------------------------------------------------------------- 1 | from athenian.api.native.mi_heap_destroy_stl_allocator cimport mi_heap_destroy_stl_allocator 2 | 3 | 4 | cdef extern from "chunked_stream.h" nogil: 5 | cdef cppclass chunked_stream[I=*]: 6 | chunked_stream chunked_stream[X](mi_heap_destroy_stl_allocator[X] &) except + 7 | void write(const void *buffer, size_t size) 8 | size_t dump(char *output, size_t output_size) 9 | size_t size() 10 | -------------------------------------------------------------------------------- /2023/native/cpython.pxd: -------------------------------------------------------------------------------- 1 | from cpython cimport PyObject 2 | 3 | ctypedef PyObject *PyObjectPtr 4 | 5 | cdef extern from "structmember.h": 6 | ctypedef struct PyMemberDef: 7 | const char *name 8 | int type 9 | Py_ssize_t offset 10 | int flags 11 | const char *doc 12 | 13 | cdef extern from "Python.h": 14 | ctypedef PyObject *(*allocfunc)(PyTypeObject *cls, Py_ssize_t nitems) 15 | 16 | ctypedef struct PyTypeObject: 17 | allocfunc tp_alloc 18 | PyMemberDef *tp_members 19 | 20 | bint PyObject_TypeCheck(PyObject *, PyTypeObject *) nogil 21 | PyTypeObject *Py_TYPE(const PyObject *) nogil 22 | 23 | bint PyLong_CheckExact(PyObject *) nogil 24 | long PyLong_AsLong(PyObject *) nogil 25 | 26 | double PyFloat_AS_DOUBLE(PyObject *) nogil 27 | bint PyFloat_CheckExact(PyObject *) nogil 28 | bint PyFloat_Check(PyObject *) nogil 29 | 30 | PyObject *PyList_New(Py_ssize_t len) 31 | bint PyList_CheckExact(PyObject *) nogil 32 | Py_ssize_t PyList_GET_SIZE(PyObject *) nogil 33 | PyObject *PyList_GET_ITEM(PyObject *, Py_ssize_t) nogil 34 | void PyList_SET_ITEM(PyObject *list, Py_ssize_t i, PyObject *o) nogil 35 | 36 | PyObject *PyTuple_GET_ITEM(PyObject *, Py_ssize_t) nogil 37 | 38 | bint PyDict_CheckExact(PyObject *) nogil 39 | int PyDict_Next(PyObject *p, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) nogil 40 | Py_ssize_t PyDict_Size(PyObject *p) nogil 41 | 42 | bint PyUnicode_Check(PyObject *) nogil 43 | Py_ssize_t PyUnicode_GET_LENGTH(PyObject *) nogil 44 | unsigned int PyUnicode_KIND(PyObject *) nogil 45 | void *PyUnicode_DATA(PyObject *) nogil 46 | Py_ssize_t PyUnicode_FindChar( 47 | PyObject *str, Py_UCS4 ch, Py_ssize_t start, Py_ssize_t end, int direction 48 | ) nogil 49 | 50 | bint PyBytes_Check(PyObject *) nogil 51 | char *PyBytes_AS_STRING(PyObject *) nogil 52 | Py_ssize_t PyBytes_GET_SIZE(PyObject *) nogil 53 | 54 | bint PyByteArray_CheckExact(PyObject *) nogil 55 | char *PyByteArray_AS_STRING(PyObject *) nogil 56 | 57 | unsigned int PyUnicode_1BYTE_KIND 58 | unsigned int PyUnicode_2BYTE_KIND 59 | unsigned int PyUnicode_4BYTE_KIND 60 | 61 | PyObject *Py_None 62 | PyObject *Py_True 63 | PyObject *Py_False 64 | 65 | PyTypeObject PyLong_Type 66 | PyTypeObject PyFloat_Type 67 | PyTypeObject PyUnicode_Type 68 | PyTypeObject PyBool_Type 69 | PyTypeObject PyList_Type 70 | PyTypeObject PyDict_Type 71 | PyTypeObject PyBaseObject_Type 72 | 73 | void Py_INCREF(PyObject *) 74 | void Py_DECREF(PyObject *) 75 | 76 | object PyUnicode_FromStringAndSize(const char *, Py_ssize_t) 77 | str PyUnicode_FromKindAndData(unsigned int kind, void *buffer, Py_ssize_t size) 78 | str PyUnicode_FromString(const char *) 79 | object PyUnicode_New(Py_ssize_t, Py_UCS4) 80 | PyObject *PyBytes_FromStringAndSize(char *v, Py_ssize_t len) 81 | PyObject *PyLong_FromLong(long v) 82 | PyObject *PyObject_GetItem(PyObject *o, PyObject *key) 83 | PyObject *PyObject_GetAttr(PyObject *o, PyObject *attr_name) 84 | 85 | cdef extern from "datetime.h" nogil: 86 | bint PyDateTime_Check(PyObject *) 87 | bint PyDelta_Check(PyObject *) 88 | 89 | int PyDateTime_GET_YEAR(PyObject *) 90 | int PyDateTime_GET_MONTH(PyObject *) 91 | int PyDateTime_GET_DAY(PyObject *) 92 | 93 | int PyDateTime_DATE_GET_HOUR(PyObject *) 94 | int PyDateTime_DATE_GET_MINUTE(PyObject *) 95 | int PyDateTime_DATE_GET_SECOND(PyObject *) 96 | 97 | int PyDateTime_DELTA_GET_DAYS(PyObject *) 98 | int PyDateTime_DELTA_GET_SECONDS(PyObject *) 99 | 100 | PyObject *PyDateTime_DATE_GET_TZINFO(PyObject *) 101 | 102 | ctypedef struct PyDateTime_CAPI: 103 | PyObject *TimeZone_UTC 104 | -------------------------------------------------------------------------------- /2023/native/mi_heap_destroy_stl_allocator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | template< 11 | class T, 12 | class U, 13 | class HASH = std::hash, 14 | class PRED = std::equal_to 15 | > 16 | using mi_unordered_map = std::unordered_map>>; 17 | 18 | template< 19 | class T, 20 | class HASH = std::hash, 21 | class PRED = std::equal_to 22 | > 23 | using mi_unordered_set = std::unordered_set>; 24 | 25 | template 26 | using mi_vector = std::vector>; 27 | 28 | using mi_string = std::basic_string, mi_heap_destroy_stl_allocator>; 29 | 30 | namespace std { 31 | template<> struct hash { 32 | size_t operator()(const mi_string &s) const { 33 | return std::hash()(s); 34 | } 35 | }; 36 | } 37 | 38 | struct empty_deleter { 39 | template 40 | void operator()(T *) const noexcept {} 41 | }; 42 | -------------------------------------------------------------------------------- /2023/native/mi_heap_destroy_stl_allocator.pxd: -------------------------------------------------------------------------------- 1 | # cython: language_level=3, boundscheck=False, nonecheck=False, optimize.unpack_method_calls=True 2 | # cython: warn.maybe_uninitialized=True 3 | 4 | from cpython.pycapsule cimport PyCapsule_GetPointer 5 | from libcpp cimport bool 6 | from libcpp.string cimport string 7 | from libcpp.unordered_map cimport pair, unordered_map 8 | from libcpp.unordered_set cimport unordered_set 9 | from libcpp.vector cimport vector 10 | 11 | 12 | cdef extern from "mi_heap_destroy_stl_allocator.h" nogil: 13 | cdef cppclass mi_heap_destroy_stl_allocator[T]: 14 | mi_heap_destroy_stl_allocator() except + 15 | mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator &) 16 | T* allocate(size_t count) except + 17 | void deallocate(T*) 18 | 19 | cdef cppclass empty_deleter: 20 | empty_deleter() 21 | 22 | cdef cppclass mi_unordered_map[T, U, HASH=*, PRED=*](unordered_map[T, U, HASH, PRED]): 23 | mi_unordered_map mi_unordered_map[X](mi_heap_destroy_stl_allocator[X]&) except + 24 | pair[mi_unordered_map.iterator, bool] try_emplace(...) except + 25 | mi_heap_destroy_stl_allocator[T] get_allocator() 26 | 27 | cdef cppclass mi_unordered_set[T, HASH=*, PRED=*](unordered_set[T, HASH, PRED]): 28 | mi_unordered_set mi_unordered_set[X](mi_heap_destroy_stl_allocator[X]&) except + 29 | pair[mi_unordered_set.iterator, bool] emplace(...) except + 30 | mi_heap_destroy_stl_allocator[T] get_allocator() 31 | 32 | mi_unordered_set.iterator erase(mi_unordered_set.iterator) 33 | mi_unordered_set.iterator erase(mi_unordered_set.iterator, mi_unordered_set.iterator) 34 | size_t erase(T&) 35 | 36 | cdef cppclass mi_vector[T](vector[T]): 37 | mi_vector mi_vector[X](mi_heap_destroy_stl_allocator[X]&) except + 38 | T& emplace_back(...) except + 39 | mi_heap_destroy_stl_allocator[T] get_allocator() 40 | 41 | cdef cppclass mi_string(string): 42 | mi_string mi_string[X](const char *, size_t, mi_heap_destroy_stl_allocator[X]&) except + 43 | mi_heap_destroy_stl_allocator[char] get_allocator() 44 | 45 | 46 | cdef inline mi_heap_destroy_stl_allocator[char] *mi_heap_allocator_from_capsule(obj) except? NULL: 47 | return PyCapsule_GetPointer(obj, b"mi_heap_destroy_stl_allocator") 48 | 49 | 50 | cdef inline void _delete_mi_heap_allocator_in_capsule(obj): 51 | cdef mi_heap_destroy_stl_allocator[char] *alloc = mi_heap_allocator_from_capsule(obj) 52 | del alloc 53 | -------------------------------------------------------------------------------- /2023/native/mi_heap_destroy_stl_allocator.pyx: -------------------------------------------------------------------------------- 1 | # cython: language_level=3, boundscheck=False, nonecheck=False, optimize.unpack_method_calls=True 2 | # cython: warn.maybe_uninitialized=True 3 | # distutils: language = c++ 4 | # distutils: extra_compile_args = -std=c++17 5 | # distutils: libraries = mimalloc 6 | # distutils: runtime_library_dirs = /usr/local/lib 7 | 8 | from cpython.pycapsule cimport PyCapsule_New 9 | 10 | 11 | def make_mi_heap_allocator_capsule() -> object: 12 | cdef mi_heap_destroy_stl_allocator[char] *alloc = new mi_heap_destroy_stl_allocator[char]() 13 | return PyCapsule_New(alloc, b"mi_heap_destroy_stl_allocator", _delete_mi_heap_allocator_in_capsule) 14 | -------------------------------------------------------------------------------- /2023/native/numpy.pxd: -------------------------------------------------------------------------------- 1 | from cpython cimport PyObject 2 | from numpy cimport dtype as npdtype, npy_int64, npy_intp 3 | 4 | from athenian.api.native.cpython cimport PyTypeObject 5 | 6 | 7 | cdef extern from "numpy/arrayobject.h": 8 | PyTypeObject PyArray_Type 9 | PyTypeObject PyDatetimeArrType_Type 10 | PyTypeObject PyDoubleArrType_Type 11 | PyTypeObject PyIntegerArrType_Type 12 | PyTypeObject PyFloatArrType_Type 13 | PyTypeObject PyTimedeltaArrType_Type 14 | 15 | enum: NPY_DATETIME_NAT 16 | 17 | ctypedef struct PyArray_Descr: 18 | char kind 19 | char type 20 | char byteorder 21 | char flags 22 | int type_num 23 | int itemsize "elsize" 24 | int alignment 25 | 26 | PyObject *PyArray_NewFromDescr( 27 | PyTypeObject *subtype, 28 | PyArray_Descr *descr, 29 | int nd, 30 | const npy_intp *dims, 31 | const npy_intp *strides, 32 | void *data, 33 | int flags, 34 | PyObject *obj, 35 | ) 36 | npdtype PyArray_DescrNew(npdtype) 37 | 38 | void *PyArray_DATA(PyObject *) nogil 39 | char *PyArray_BYTES(PyObject *) nogil 40 | npy_intp PyArray_DIM(PyObject *, size_t) nogil 41 | npy_intp PyArray_STRIDE(PyObject *, size_t) nogil 42 | int PyArray_NDIM(PyObject *) nogil 43 | npy_intp PyArray_ITEMSIZE(PyObject *) nogil 44 | bint PyArray_CheckExact(PyObject *) nogil 45 | PyArray_Descr *PyArray_DESCR(PyObject *) nogil 46 | int PyArray_TYPE(PyObject *) nogil 47 | bint PyArray_IS_C_CONTIGUOUS(PyObject *) nogil 48 | bint PyArray_IS_F_CONTIGUOUS(PyObject *) nogil 49 | void PyArray_ScalarAsCtype(PyObject *scalar, void *ctypeptr) nogil 50 | 51 | ctypedef enum NPY_DATETIMEUNIT: 52 | NPY_FR_ERROR = -1 53 | NPY_FR_M = 1 54 | NPY_FR_W = 2 55 | NPY_FR_D = 4 56 | NPY_FR_h = 5 57 | NPY_FR_m = 6 58 | NPY_FR_s = 7 59 | NPY_FR_ms = 8 60 | NPY_FR_us = 9 61 | NPY_FR_ns = 10 62 | NPY_FR_ps = 11 63 | NPY_FR_fs = 12 64 | NPY_FR_as = 13 65 | NPY_FR_GENERIC = 14 66 | 67 | ctypedef struct PyArray_DatetimeMetaData: 68 | NPY_DATETIMEUNIT base 69 | int num 70 | 71 | ctypedef struct PyDatetimeScalarObject: 72 | npy_int64 obval 73 | PyArray_DatetimeMetaData obmeta 74 | -------------------------------------------------------------------------------- /2023/native/optional.pxd: -------------------------------------------------------------------------------- 1 | # backported from Cython 3.0 alpha 2 | 3 | from libcpp cimport bool 4 | 5 | 6 | cdef extern from "" namespace "std" nogil: 7 | cdef cppclass nullopt_t: 8 | nullopt_t() 9 | 10 | cdef nullopt_t nullopt 11 | 12 | cdef cppclass optional[T]: 13 | ctypedef T value_type 14 | optional() 15 | optional(nullopt_t) 16 | optional(optional&) except + 17 | optional(T&) except + 18 | bool has_value() 19 | T& value() 20 | T& value_or[U](U& default_value) 21 | void swap(optional&) 22 | void reset() 23 | T& emplace(...) 24 | T& operator*() 25 | #T* operator->() # Not Supported 26 | optional& operator=(optional&) 27 | optional& operator=[U](U&) 28 | bool operator bool() 29 | bool operator!() 30 | bool operator==[U](optional&, U&) 31 | bool operator!=[U](optional&, U&) 32 | bool operator<[U](optional&, U&) 33 | bool operator>[U](optional&, U&) 34 | bool operator<=[U](optional&, U&) 35 | bool operator>=[U](optional&, U&) 36 | 37 | optional[T] make_optional[T](...) except + -------------------------------------------------------------------------------- /2023/native/string_view.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "" namespace "std" nogil: 2 | cppclass string_view: 3 | string_view() except + 4 | string_view(const char *, size_t) except + 5 | const char *data() 6 | size_t size() 7 | -------------------------------------------------------------------------------- /2023/native/sum_repeated_with_step.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #define restrict __restrict__ 6 | 7 | void sum_repeated_with_step_avx2( 8 | const int64_t * restrict src, 9 | int64_t src_len, 10 | int64_t repeats, 11 | int64_t step, 12 | int64_t * restrict dst 13 | ) { 14 | __m256i offset = _mm256_set1_epi64x(0); 15 | const __m256i step_vec = _mm256_set1_epi64x(step); 16 | for (int64_t i = 0; i < repeats; i++) { 17 | int64_t j; 18 | for (j = 0; j < src_len - 3; j += 4) { 19 | _mm256_storeu_si256( 20 | reinterpret_cast<__m256i *>(dst), 21 | _mm256_add_epi64( 22 | _mm256_loadu_si256(reinterpret_cast(src + j)), 23 | offset 24 | ) 25 | ); 26 | dst += 4; 27 | } 28 | for (; j < src_len; j++) { 29 | *dst++ = src[j] + step * i; 30 | } 31 | offset = _mm256_add_epi64(offset, step_vec); 32 | } 33 | } -------------------------------------------------------------------------------- /2023/native/utf8.pxd: -------------------------------------------------------------------------------- 1 | from libc.stdint cimport uint32_t 2 | 3 | 4 | # Adapted from CPython, licensed under PSF2 (BSD-like) 5 | cdef inline int ucs4_to_utf8_json(uint32_t ucs4, char *utf8) nogil: 6 | if ucs4 == 0: 7 | return 0 8 | if ucs4 == b"\\" or ucs4 == b'"': 9 | utf8[0] = b"\\" 10 | utf8[1] = ucs4 11 | return 2 12 | if ucs4 < 0x20: 13 | # Escape control chars 14 | utf8[0] = b"\\" 15 | utf8[1] = b"u" 16 | utf8[2] = b"0" 17 | utf8[3] = b"0" 18 | utf8[4] = b"0" if ucs4 < 0x10 else b"1" 19 | ucs4 &= 0x0F 20 | if ucs4 > 0x09: 21 | utf8[5] = (ucs4 - 0x0A) + ord(b"A") 22 | else: 23 | utf8[5] = ucs4 + ord(b"0") 24 | return 6 25 | if ucs4 < 0x80: 26 | # Encode ASCII 27 | utf8[0] = ucs4 28 | return 1 29 | if ucs4 < 0x0800: 30 | # Encode Latin-1 31 | utf8[0] = 0xc0 | (ucs4 >> 6) 32 | utf8[1] = 0x80 | (ucs4 & 0x3f) 33 | return 2 34 | if 0xD800 <= ucs4 <= 0xDFFF: 35 | return 0 36 | if ucs4 < 0x10000: 37 | utf8[0] = 0xe0 | (ucs4 >> 12) 38 | utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f) 39 | utf8[2] = 0x80 | (ucs4 & 0x3f) 40 | return 3 41 | # Encode UCS4 Unicode ordinals 42 | utf8[0] = 0xf0 | (ucs4 >> 18) 43 | utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f) 44 | utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f) 45 | utf8[3] = 0x80 | (ucs4 & 0x3f) 46 | return 4 47 | -------------------------------------------------------------------------------- /2023/pyx/asyncpg_recordobj.h: -------------------------------------------------------------------------------- 1 | // no need to #include anything, this file is used internally by to_object_arrays.pyx 2 | 3 | typedef struct { 4 | PyObject_VAR_HEAD 5 | 6 | // asyncpg specifics begin here 7 | // if they add another field, we will break spectacularly 8 | Py_hash_t self_hash; 9 | PyObject *desc; // we don't care of the actual type 10 | PyObject *ob_item[1]; // embedded in the tail, the count matches len() 11 | } ApgRecordObject; 12 | 13 | #define ApgRecord_GET_ITEM(op, i) (((ApgRecordObject *)(op))->ob_item[i]) 14 | #define ApgRecord_SET_ITEM(op, i, v) (((ApgRecordObject *)(op))->ob_item[i] = v) 15 | #define ApgRecord_GET_DESC(op) (((ApgRecordObject *)(op))->desc) 16 | -------------------------------------------------------------------------------- /2023/pyx/dag_accelerated.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | size_t sorted_set_difference_avx2( 5 | const uint32_t *__restrict__ set1, 6 | const size_t length1, 7 | const uint32_t *__restrict__ set2, 8 | const size_t length2, 9 | uint32_t *__restrict__ out) { 10 | __m256i left, right; 11 | const __m256i ones = _mm256_set1_epi8(0xff); 12 | size_t passed = 0; 13 | const uint32_t *border_left = set1 + length1 - 8; 14 | const uint32_t *border_right = set2 + length2 - 8; 15 | while (set1 <= border_left && set2 <= border_right) { 16 | left = _mm256_loadu_si256(reinterpret_cast(set1)); 17 | right = _mm256_loadu_si256(reinterpret_cast(set2)); 18 | __m256i c = _mm256_cmpeq_epi32(left, right); 19 | if (_mm256_testc_si256(c, ones)) { 20 | set1 += 8; 21 | set2 += 8; 22 | continue; 23 | } 24 | int offset = __builtin_ctz(~static_cast(_mm256_movemask_epi8(c))) >> 2; 25 | set1 += offset; 26 | set2 += offset; 27 | if (*set1 < *set2) { 28 | out[passed++] = *set1++; 29 | } else { 30 | set2++; 31 | } 32 | } 33 | border_left += 8; 34 | border_right += 8; 35 | while (set1 < border_left && set2 < border_right) { 36 | uint32_t ileft = *set1; 37 | uint32_t iright = *set2; 38 | if (ileft == iright) { 39 | set1++; 40 | set2++; 41 | } else if (ileft < iright) { 42 | out[passed++] = ileft; 43 | set1++; 44 | } else { 45 | set2++; 46 | } 47 | } 48 | if (set2 == border_right) { 49 | while (set1 < border_left) { 50 | out[passed++] = *set1++; 51 | } 52 | } 53 | return passed; 54 | } -------------------------------------------------------------------------------- /2023/pyx/interval_intersections.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | template 4 | void argsort_bodies(const mi_vector &bodies, mi_vector &indexes) noexcept { 5 | std::sort(indexes.begin(), indexes.end(), [&bodies](I left, I right) -> bool { 6 | return bodies[left] < bodies[right]; 7 | }); 8 | } -------------------------------------------------------------------------------- /2023/pyx/interval_intersections.pyx: -------------------------------------------------------------------------------- 1 | # cython: language_level=3, boundscheck=False, nonecheck=False, optimize.unpack_method_calls=True 2 | # cython: warn.maybe_uninitialized=True 3 | # distutils: language = c++ 4 | # distutils: extra_compile_args = -std=c++17 5 | # distutils: libraries = mimalloc 6 | # distutils: runtime_library_dirs = /usr/local/lib 7 | 8 | cimport cython 9 | from cython.operator cimport dereference as deref, postincrement 10 | from libc.stdint cimport int64_t, uint64_t 11 | from libcpp cimport bool 12 | from libcpp.set cimport set 13 | from libcpp.utility cimport move 14 | 15 | from athenian.api.native.mi_heap_destroy_stl_allocator cimport ( 16 | mi_heap_allocator_from_capsule, 17 | mi_heap_destroy_stl_allocator, 18 | mi_string, 19 | mi_unordered_map, 20 | mi_vector, 21 | ) 22 | from athenian.api.native.optional cimport optional 23 | 24 | import numpy as np 25 | 26 | 27 | # __builtin_clzl is a compiler built-in that counts the number of leading zeros 28 | cdef extern int __builtin_clzl(unsigned long) 29 | 30 | cdef inline int _leading_zero_bits(unsigned long n): 31 | if n == 0: 32 | # __builtin_clzl is undefined when called with 0 33 | return sizeof(n) * 8 34 | return __builtin_clzl(n) 35 | 36 | 37 | def calculate_interval_intersections(starts: np.ndarray, 38 | finishes: np.ndarray, 39 | borders: np.ndarray, 40 | ) -> np.ndarray: 41 | cdef unsigned long max_intervals, groups_count, time_offset 42 | assert len(starts) == len(finishes) 43 | assert starts.dtype == np.uint64 44 | assert finishes.dtype == np.uint64 45 | if len(starts) == 0: 46 | return np.array([], dtype=float) 47 | time_offset = starts.min() 48 | # require less bits for the timestamps 49 | starts -= time_offset 50 | finishes -= time_offset 51 | # there can be intervals of zero length, make them 1-second 52 | finishes[starts >= finishes] += 1 53 | group_lengths = np.diff(borders, prepend=0) 54 | max_intervals = group_lengths.max() 55 | series = np.arange(max_intervals, dtype=np.uint64) 56 | intervals = np.concatenate([starts, finishes]) 57 | size = len(starts) 58 | time_offset = 64 - _leading_zero_bits(max_intervals - 1) 59 | intervals <<= time_offset 60 | groups_count = len(borders) 61 | group_offset = _leading_zero_bits(groups_count - 1) 62 | group_indexes = np.repeat(np.arange(groups_count, dtype=np.uint64), group_lengths) 63 | 64 | intervals[:size] |= group_indexes << group_offset 65 | intervals[size:] |= group_indexes << group_offset 66 | # https://codereview.stackexchange.com/questions/83018/vectorized-numpy-version-of-arange-with-multiple-start-stop 67 | indexes = ( 68 | np.repeat(group_lengths - group_lengths.cumsum(), group_lengths) + 69 | np.arange(group_lengths.sum()) 70 | ).view(np.uint64) 71 | intervals[:size] |= indexes 72 | intervals[size:] |= indexes 73 | # bits 0..time_offset - interval indexes, each in range 0..group length 74 | # bits time_offset..(64 - group_offset) - timestamps 75 | # bits (64 - group_offset)..63 - group index 76 | # stable sort because starts must come before finishes if the timestamps are equal 77 | intervals = np.sort(intervals, kind="stable") 78 | # remove the group indexes 79 | intervals &= (1 << group_offset) - 1 80 | raw = np.zeros(size, dtype=np.uint64) 81 | cdef: 82 | const uint64_t[:] intervals_view = intervals 83 | const int64_t[:] borders_view = borders * 2 84 | uint64_t[:] raw_view = raw 85 | with nogil: 86 | _calculate_interval_intersections(intervals_view, borders_view, time_offset, raw_view) 87 | result = raw.astype(float) / (finishes - starts) 88 | return result 89 | 90 | 91 | @cython.boundscheck(False) 92 | @cython.wraparound(False) 93 | cdef void _calculate_interval_intersections(const uint64_t[:] intervals, 94 | const int64_t[:] borders, 95 | char time_offset, 96 | uint64_t[:] intersections) nogil: 97 | cdef: 98 | int64_t i, j, border_index, group_start, group_finish, ii_open, intersections_offset 99 | uint64_t item, index_mask, timestamp, previous_timestamp, delta 100 | # set faster than unordered_set because we iterate over all elements on each step 101 | set[int64_t] open_intervals 102 | set[int64_t].iterator ii 103 | index_mask = (1 << time_offset) - 1 104 | previous_timestamp = 0 # not really needed but removes the warning 105 | for border_index in range(len(borders)): 106 | group_start = borders[border_index - 1] if border_index > 0 else 0 107 | intersections_offset = group_start >> 1 108 | group_finish = borders[border_index] 109 | for i in range(group_start, group_finish): 110 | item = intervals[i] 111 | timestamp = item >> time_offset 112 | delta = (timestamp - previous_timestamp) * open_intervals.size() 113 | for ii_open in open_intervals: 114 | intersections[intersections_offset + ii_open] += delta 115 | interval_index = item & index_mask 116 | ii = open_intervals.find(interval_index) 117 | if ii == open_intervals.end(): 118 | open_intervals.insert(interval_index) 119 | else: 120 | open_intervals.erase(ii) 121 | previous_timestamp = timestamp 122 | open_intervals.clear() 123 | 124 | -------------------------------------------------------------------------------- /2023/pyx/io.pyx: -------------------------------------------------------------------------------- 1 | # cython: language_level=3, boundscheck=False, nonecheck=False, optimize.unpack_method_calls=True 2 | # cython: warn.maybe_uninitialized=True 3 | # distutils: language = c++ 4 | # distutils: extra_compile_args = -std=c++17 5 | 6 | import pickle 7 | from typing import Any 8 | 9 | from cpython cimport Py_INCREF, PyBytes_FromStringAndSize, PyObject, PyTuple_New, PyTuple_SET_ITEM 10 | from libc.stdint cimport uint32_t 11 | from libc.string cimport memcpy 12 | 13 | from athenian.api.native.cpython cimport PyBytes_AS_STRING 14 | 15 | from medvedi import DataFrame 16 | 17 | 18 | cdef extern from "" nogil: 19 | char *gcvt(double number, int ndigit, char *buf) 20 | 21 | 22 | cdef extern from "" nogil: 23 | size_t strnlen(const char *, size_t) 24 | 25 | 26 | def serialize_args(tuple args, alloc_capsule=None) -> bytes: 27 | cdef: 28 | bytes result, buffer 29 | Py_ssize_t size = 4 30 | list buffers = [] 31 | char *output 32 | bint is_df 33 | 34 | for arg in args: 35 | if isinstance(arg, DataFrame): 36 | is_df = True 37 | buffer = arg.serialize_unsafe() 38 | else: 39 | is_df = False 40 | buffer = pickle.dumps(arg) 41 | size += len(buffer) + 5 42 | buffers.append((is_df, buffer)) 43 | result = PyBytes_FromStringAndSize(NULL, size) 44 | output = PyBytes_AS_STRING( result) 45 | ( output)[0] = len(buffers) 46 | output += 4 47 | for is_df, buffer in buffers: 48 | output[0] = is_df 49 | output += 1 50 | size = len(buffer) 51 | ( output)[0] = size 52 | output += 4 53 | memcpy(output, PyBytes_AS_STRING( buffer), size) 54 | output += size 55 | return result 56 | 57 | 58 | def deserialize_args(bytes buffer) -> tuple[Any]: 59 | cdef: 60 | uint32_t size, i 61 | tuple result 62 | long offset = 4 63 | object item 64 | char is_df 65 | 66 | input = PyBytes_AS_STRING( buffer) 67 | size = ( input)[0] 68 | input += 4 69 | result = PyTuple_New(size) 70 | for i in range(size): 71 | is_df = input[0] 72 | input += 1 73 | size = ( input)[0] 74 | input += 4 75 | offset += 5 76 | if is_df: 77 | item = DataFrame.deserialize_unsafe(buffer[offset: offset + size]) 78 | else: 79 | item = pickle.loads(buffer[offset: offset + size]) 80 | offset += size 81 | input += size 82 | Py_INCREF(item) 83 | PyTuple_SET_ITEM(result, i, item) 84 | return result 85 | -------------------------------------------------------------------------------- /2023/pyx/object_arrays.pyx: -------------------------------------------------------------------------------- 1 | # cython: language_level=3, boundscheck=False, nonecheck=False, optimize.unpack_method_calls=True 2 | # cython: warn.maybe_uninitialized=True 3 | # distutils: language = c++ 4 | # distutils: libraries = mimalloc 5 | # distutils: runtime_library_dirs = /usr/local/lib 6 | # distutils: extra_compile_args = -std=c++17 -mavx2 -ftree-vectorize 7 | 8 | cimport cython 9 | from cpython cimport Py_INCREF, PyObject 10 | from cpython.bytearray cimport PyByteArray_AS_STRING, PyByteArray_Check 11 | from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_Check 12 | from cpython.memoryview cimport PyMemoryView_Check, PyMemoryView_GET_BUFFER 13 | from cpython.unicode cimport PyUnicode_Check 14 | from cython.operator cimport dereference 15 | from libc.stdint cimport int32_t, int64_t 16 | from libc.string cimport memcpy, memset 17 | from numpy cimport ( 18 | NPY_ARRAY_C_CONTIGUOUS, 19 | NPY_OBJECT, 20 | PyArray_CheckExact, 21 | PyArray_DATA, 22 | PyArray_Descr, 23 | PyArray_DescrFromType, 24 | PyArray_DIM, 25 | PyArray_GETCONTIGUOUS, 26 | PyArray_ISOBJECT, 27 | PyArray_ISSTRING, 28 | PyArray_NDIM, 29 | PyArray_SetBaseObject, 30 | dtype as npdtype, 31 | import_array, 32 | ndarray, 33 | npy_bool, 34 | npy_intp, 35 | ) 36 | 37 | from athenian.api.native.chunked_stream cimport chunked_stream 38 | from athenian.api.native.cpython cimport ( 39 | Py_None, 40 | Py_True, 41 | PyBytes_GET_SIZE, 42 | PyList_CheckExact, 43 | PyList_GET_ITEM, 44 | PyList_GET_SIZE, 45 | PyTuple_GET_ITEM, 46 | PyTypeObject, 47 | PyUnicode_DATA, 48 | PyUnicode_GET_LENGTH, 49 | PyUnicode_KIND, 50 | ) 51 | from athenian.api.native.mi_heap_destroy_stl_allocator cimport mi_heap_destroy_stl_allocator 52 | from athenian.api.native.numpy cimport ( 53 | PyArray_DESCR, 54 | PyArray_DescrNew, 55 | PyArray_NewFromDescr, 56 | PyArray_Type, 57 | ) 58 | from athenian.api.native.optional cimport optional 59 | 60 | import asyncpg 61 | import numpy as np 62 | 63 | import_array() 64 | 65 | 66 | cdef extern from "asyncpg_recordobj.h": 67 | PyObject *ApgRecord_GET_ITEM(PyObject *, int) 68 | 69 | 70 | @cython.boundscheck(False) 71 | def to_object_arrays(list rows not None, int columns) -> np.ndarray: 72 | """ 73 | Convert a list of tuples or asyncpg.Record-s into an object array. Any subclass of 74 | tuple in `rows` will be casted to tuple. 75 | 76 | Parameters 77 | ---------- 78 | rows: 2-d array (N, K) 79 | list of tuples to be converted into an array. Each tuple must be of equal length, 80 | otherwise, the results are undefined. 81 | columns: number of columns in each row. 82 | 83 | Returns 84 | ------- 85 | np.ndarray[object, ndim=2] 86 | """ 87 | cdef: 88 | Py_ssize_t i, j, size 89 | ndarray[object, ndim=2] result 90 | PyObject *record 91 | 92 | size = len(rows) 93 | 94 | result = np.empty((columns, size), dtype=object) 95 | if size == 0: 96 | return result 97 | 98 | if isinstance(rows[0], asyncpg.Record): 99 | for i in range(size): 100 | record = PyList_GET_ITEM(rows, i) 101 | for j in range(columns): 102 | result[j, i] =