├── src ├── actordb_driver.app.src ├── actordb_driver_nif.erl └── actordb_driver.erl ├── c_src ├── wbuf.h ├── platform.c ├── lfqueue.h ├── queue.h ├── platform.h ├── wbuf.c ├── queue.c ├── midl.h ├── lfqueue.c ├── midl.c ├── actordb_driver_nif.h ├── noerl.c ├── nullvfs.c ├── lz4.h ├── tool.c └── wal.c ├── README.md ├── Makefile ├── rebar.config └── test └── test.erl /src/actordb_driver.app.src: -------------------------------------------------------------------------------- 1 | {application, actordb_driver, 2 | [ 3 | {description, "actordb sqlite nif interface"}, 4 | {vsn, "0.10.28"}, 5 | {modules, [actordb_driver, actordb_driver_nif]}, 6 | {registered, []}, 7 | {applications, [ 8 | kernel, 9 | stdlib 10 | ]}, 11 | {env, []} 12 | ]}. 13 | -------------------------------------------------------------------------------- /c_src/wbuf.h: -------------------------------------------------------------------------------- 1 | #ifndef _WBUF_H_ 2 | #define _WBUF_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | // #include "erl_nif.h" 8 | 9 | 10 | char* wbuf_init(const int npages); 11 | int wbuf_put(const int npages, char *buf, char *data, int *tries); 12 | char* wbuf_get(const int npages, char *buf, int index); 13 | void wbuf_release(char *buf, int index); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ActorDB interface to SQLite and LMDB. 2 | 3 | c_src/actordb_driver_nif.c - erlang interface 4 | 5 | c_src/wal.c - interface between SQLite and LMDB. Identical API to original SQLite WAL implementation. 6 | 7 | c_src/mdb.c - LMDB 8 | 9 | c_src/sqlite3.c - SQLite amalgamation code without wal.c 10 | 11 | c_src/queue.c - old work queue. 12 | 13 | c_src/lfqueue.c - lock free queue used atm. 14 | 15 | c_src/wbuf.c - another lock free experiment that is not used. A lock free write buffer. 16 | 17 | c_src/tool.c - actordb_tool application. 18 | 19 | c_src/nullvfs.c - an SQLite VFS that does nothing because we don't need sqlite files. Everything is in LMDB through the WAL API. 20 | 21 | c_src/lz4.c - LZ4 compression. Every SQLite page stored in LMDB is compressed using LZ4. 22 | 23 | c_src/noerl.c - Erlang-less app that uses our SQLite+LMDB engine. Used for easy profiling and debugging. 24 | 25 | -------------------------------------------------------------------------------- /c_src/platform.c: -------------------------------------------------------------------------------- 1 | #include "platform.h" 2 | 3 | #ifdef _WIN32 4 | 5 | int clock_gettime(int X, struct timespec* tp) 6 | { 7 | FILETIME ft; 8 | uint64_t nanos; 9 | GetSystemTimeAsFileTime(&ft); 10 | nanos = ((((uint64_t)ft.dwHighDateTime) << 32) | ft.dwLowDateTime) * 100; 11 | tp->tv_sec = nanos / 1000000000ul; 12 | tp->tv_nsec = nanos % 1000000000ul; 13 | return 1; 14 | } 15 | #endif 16 | 17 | #if !defined(__APPLE__) && !defined(_WIN32) 18 | #include 19 | int SEM_TIMEDWAIT(sem_t s, uint32_t milis) 20 | { 21 | struct timespec ts; 22 | struct timespec dts; 23 | struct timespec sts; 24 | int r; 25 | 26 | if (clock_gettime(CLOCK_REALTIME, &ts) == -1) 27 | return -1; 28 | 29 | dts.tv_sec = milis / 1000; 30 | dts.tv_nsec = (milis % 1000) * 1000000; 31 | sts.tv_sec = ts.tv_sec + dts.tv_sec + (dts.tv_nsec + ts.tv_nsec) / 1000000000; 32 | sts.tv_nsec = (dts.tv_nsec + ts.tv_nsec) % 1000000000; 33 | 34 | while ((r = sem_timedwait(&s, &sts)) == -1 && errno == EINTR) 35 | continue; 36 | return r; 37 | } 38 | #endif 39 | -------------------------------------------------------------------------------- /c_src/lfqueue.h: -------------------------------------------------------------------------------- 1 | #ifndef LFQUEUE_H 2 | #define LFQUEUE_H 3 | 4 | #ifndef _TESTAPP_ 5 | #include "erl_nif.h" 6 | #endif 7 | #include "platform.h" 8 | 9 | typedef struct queue_t queue; 10 | typedef struct qitem_t qitem; 11 | typedef struct intq intq; 12 | 13 | struct qitem_t 14 | { 15 | _Atomic (struct qitem_t*) next; 16 | void *cmd; 17 | #ifndef _TESTAPP_ 18 | ErlNifEnv *env; 19 | #endif 20 | char blockStart; 21 | // Every pair of producer-consumer has a reuse queue. 22 | // This way we're not constantly doing allocations. 23 | // Home is a queue that is attached to every producer 24 | // (scheduler) thread. 25 | intq *home; 26 | }; 27 | 28 | struct intq 29 | { 30 | _Atomic (qitem*) head; 31 | qitem* tail; 32 | }; 33 | 34 | struct queue_t 35 | { 36 | struct intq q; 37 | SEMAPHORE sem; 38 | size_t length; 39 | }; 40 | 41 | queue *queue_create(void); 42 | void queue_destroy(queue *queue); 43 | 44 | int queue_push(queue *queue, qitem* item); 45 | qitem* queue_pop(queue *queue); 46 | qitem* queue_trypop(queue *queue); 47 | qitem* queue_timepop(queue *queue, uint32_t miliseconds); 48 | 49 | void queue_recycle(qitem* item); 50 | qitem* queue_get_item(void); 51 | void queue_intq_destroy(intq *q); 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ifeq ($(uname_S),Darwin) 2 | TOOLCMD = gcc c_src/tool.c c_src/mdb.c c_src/midl.c c_src/lz4.c -D_TESTAPP_=1 -DMDB_MAXKEYSIZE=0 -DSQLITE_DEFAULT_PAGE_SIZE=4096 -DSQLITE_DEFAULT_WAL_AUTOCHECKPOINT=0 -o adbtool 3 | else 4 | TOOLCMD = gcc c_src/tool.c c_src/mdb.c c_src/midl.c c_src/lz4.c -D_TESTAPP_=1 -DMDB_MAXKEYSIZE=0 -DSQLITE_DEFAULT_PAGE_SIZE=4096 -DSQLITE_DEFAULT_WAL_AUTOCHECKPOINT=0 -lpthread -ldl -o adbtool 5 | endif 6 | 7 | ifeq ($(uname_S),Darwin) 8 | NOERLCMD = gcc c_src/noerl.c c_src/mdb.c c_src/midl.c c_src/lz4.c -D_TESTAPP_=1 -DMDB_MAXKEYSIZE=0 -DSQLITE_DEFAULT_PAGE_SIZE=4096 -DSQLITE_DEFAULT_WAL_AUTOCHECKPOINT=0 -DSQLITE_DEFAULT_MEMSTATUS=0 -g -o noerl 9 | else 10 | NOERLCMD = gcc c_src/noerl.c c_src/mdb.c c_src/midl.c c_src/lz4.c -D_TESTAPP_=1 -DMDB_MAXKEYSIZE=0 -DSQLITE_DEFAULT_PAGE_SIZE=4096 -DSQLITE_DEFAULT_WAL_AUTOCHECKPOINT=0 -DSQLITE_DEFAULT_MEMSTATUS=0 -g -lpthread -ldl -o noerl 11 | endif 12 | 13 | all: 14 | ../../rebar compile 15 | $(TOOLCMD) 16 | 17 | clean: 18 | ../../rebar clean 19 | 20 | eunit: 21 | ../../rebar eunit 22 | 23 | tool: 24 | $(TOOLCMD) 25 | 26 | ne: 27 | $(NOERLCMD) 28 | 29 | lldb: 30 | -rm *.db wal.* 31 | gcc c_src/test.c -g -DSQLITE_DEBUG -DSQLITE_DEFAULT_PAGE_SIZE=4096 -DSQLITE_THREADSAFE=0 -DSQLITE_DEFAULT_WAL_AUTOCHECKPOINT=0 -o t && lldb t 32 | 33 | valgrind: 34 | -rm *.db wal.* 35 | gcc c_src/test.c -g -DSQLITE_DEBUG -DSQLITE_DEFAULT_PAGE_SIZE=4096 -DSQLITE_THREADSAFE=0 -DSQLITE_DEFAULT_WAL_AUTOCHECKPOINT=0 -o t && valgrind --tool=memcheck --track-origins=yes --leak-check=full ./t 36 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {port_env, 2 | [{"win32","CFLAGS", "$CFLAGS -DMDB_MAXKEYSIZE=0 -DSQLITE_ENABLE_FTS5 -DSQLITE_ENABLE_RTREE -DSQLITE_ENABLE_JSON1 -DSQLITE_DEFAULT_CACHE_SIZE=1000 -DSQLITE_DEFAULT_FOREIGN_KEYS=1 -DSQLITE_THREADSAFE=0 -DSQLITE_DEFAULT_PAGE_SIZE=4096 -DSQLITE_OMIT_SHARED_CACHE -DSQLITE_DEFAULT_WAL_AUTOCHECKPOINT=0 -DSQLITE_DEFAULT_MEMSTATUS=0 -DNDEBUG=1"}, 3 | {"win32","ERL_LDFLAGS","Ws2_32.lib Advapi32.lib /DEBUG"}, 4 | 5 | {"darwin","CFLAGS", "$CFLAGS -g -DMDB_USE_ROBUST=0 -DMDB_MAXKEYSIZE=0 -DSQLITE_ENABLE_FTS5 -DSQLITE_ENABLE_RTREE -DSQLITE_OMIT_SHARED_CACHE -DSQLITE_ENABLE_JSON1 -DSQLITE_DEFAULT_CACHE_SIZE=1000 -DSQLITE_DEFAULT_FOREIGN_KEYS=1 -DSQLITE_THREADSAFE=0 -DSQLITE_DEFAULT_PAGE_SIZE=4096 -DSQLITE_DEFAULT_WAL_AUTOCHECKPOINT=0 -DSQLITE_DEFAULT_MEMSTATUS=0 -DNDEBUG=1 -finline-functions -fomit-frame-pointer -fno-strict-aliasing -Wmissing-prototypes -Wno-unused-function -O2 -arch x86_64 -std=c11 -mmacosx-version-min=10.7"}, 6 | 7 | {"linux","CFLAGS", "$CFLAGS -g -DMDB_USE_ROBUST=0 -DMDB_MAXKEYSIZE=0 -DSQLITE_ENABLE_FTS5 -DSQLITE_ENABLE_RTREE -DSQLITE_OMIT_SHARED_CACHE -DSQLITE_ENABLE_JSON1 -DSQLITE_DEFAULT_CACHE_SIZE=1000 -DSQLITE_DEFAULT_FOREIGN_KEYS=1 -DSQLITE_THREADSAFE=0 -DSQLITE_DEFAULT_PAGE_SIZE=4096 -DSQLITE_DEFAULT_WAL_AUTOCHECKPOINT=0 -DSQLITE_DEFAULT_MEMSTATUS=0 -DNDEBUG=1 -finline-functions -fomit-frame-pointer -fno-strict-aliasing -Wmissing-prototypes -Wno-unused-function -O2 -std=gnu99"} 8 | ]}. 9 | 10 | {port_specs, [{"priv/actordb_driver_nif.so", ["c_src/actordb_driver_nif.c", "c_src/platform.c", "c_src/lfqueue.c","c_src/lz4.c", "c_src/mdb.c", "c_src/midl.c"]}]}. 11 | 12 | {plugins, [pc]}. 13 | 14 | {provider_hooks, 15 | [ 16 | {pre, 17 | [ 18 | {compile, {pc, compile}}, 19 | {clean, {pc, clean}} 20 | ] 21 | } 22 | ] 23 | }. 24 | -------------------------------------------------------------------------------- /c_src/queue.h: -------------------------------------------------------------------------------- 1 | // This file is part of Emonk released under the MIT license. 2 | // See the LICENSE file for more information. 3 | 4 | /* adapted by: Maas-Maarten Zeeman 23 | #define COND_T dispatch_semaphore_t 24 | #define cond_destroy(X) dispatch_release(X) 25 | #define cond_init(X) X = dispatch_semaphore_create(0) 26 | #define cond_signal(X) dispatch_semaphore_signal(X) 27 | #define cond_wait(X,MTX) dispatch_semaphore_wait(X, DISPATCH_TIME_FOREVER) 28 | #else 29 | #include 30 | #define COND_T sem_t 31 | #define cond_destroy(X) sem_destroy(&X) 32 | #define cond_init(X) sem_init(&X, 0, 0) 33 | #define cond_signal(X) sem_post(&X) 34 | #define cond_wait(X,MTX) sem_wait(&X) 35 | #endif 36 | #else 37 | #define COND_T ErlNifCond* 38 | #define cond_destroy enif_cond_destroy 39 | #define cond_init(X) X = enif_cond_create("queue_cond") 40 | #define cond_signal(X) enif_cond_signal(X) 41 | #define cond_wait(C,MTX) enif_cond_wait(C, MTX) 42 | #endif 43 | 44 | typedef struct queue_t queue; 45 | typedef struct qitem_t qitem; 46 | 47 | struct qitem_t 48 | { 49 | qitem* next; 50 | void *cmd; 51 | #ifndef _TESTAPP_ 52 | ErlNifEnv *env; 53 | #endif 54 | char blockStart; 55 | }; 56 | 57 | struct queue_t 58 | { 59 | #ifndef _TESTAPP_ 60 | ErlNifMutex *lock; 61 | COND_T cond; 62 | #endif 63 | qitem *head; 64 | qitem *tail; 65 | qitem *reuseq; 66 | // void (*freeitem)(db_command); 67 | int length; 68 | }; 69 | 70 | queue *queue_create(void); 71 | void queue_destroy(queue *queue); 72 | 73 | // int queue_has_item(queue *queue); 74 | 75 | int queue_push(queue *queue, qitem* item); 76 | qitem* queue_pop(queue *queue); 77 | 78 | void queue_recycle(queue *queue, qitem* item); 79 | 80 | qitem* queue_get_item(queue *queue); 81 | int queue_size(queue *queue); 82 | 83 | // int queue_send(queue *queue, void* item); 84 | // void* queue_receive(queue *); 85 | 86 | #endif 87 | -------------------------------------------------------------------------------- /c_src/platform.h: -------------------------------------------------------------------------------- 1 | #ifndef _PLATFORM_H_ 2 | #define _PLATFORM_H_ 3 | 4 | #include 5 | #include 6 | 7 | #ifndef _WIN32 8 | #include 9 | #endif 10 | 11 | // #define u8 uint8_t 12 | // #define i64 int64_t 13 | // #define u64 uint64_t 14 | // #define u32 uint32_t 15 | // #define i32 int32_t 16 | 17 | // #if ATOMIC 18 | #ifndef _WIN32 19 | #include 20 | #endif 21 | 22 | #define SEC(X) (1000000000*X) 23 | #define MS(X) (1000000*X) 24 | #define US(X) (1000*X) 25 | #define QSIZE 1024 26 | 27 | #if defined(_WIN32) 28 | #include 29 | 30 | typedef struct timespec 31 | { 32 | long tv_sec; 33 | long tv_nsec; 34 | }; 35 | int clock_gettime(int X, struct timespec *tv); 36 | #endif 37 | 38 | #if defined(_WIN32) 39 | #define IOV WSABUF 40 | #define IOV_SET(IOV, DATA, LEN) IOV.buf = DATA; IOV.len = LEN 41 | #define IOV_SEND(RT, FD, IOV, IOVSIZE) WSASend(FD,IOV,IOVSIZE, &RT, 0, NULL, NULL) 42 | #else 43 | #define IOV struct iovec 44 | #define IOV_SET(IOV, DATA, LEN) IOV.iov_base = DATA; IOV.iov_len = LEN 45 | #define IOV_SEND(RT, FD, IOV, IOVSIZE) RT = writev(FD,IOV,IOVSIZE) 46 | #endif 47 | 48 | #if defined(__APPLE__) 49 | #include 50 | #include 51 | #define MemoryBarrier OSMemoryBarrier 52 | #include 53 | #define SEMAPHORE dispatch_semaphore_t 54 | #define TIME uint64_t 55 | #define SEM_INIT_SET(X) (X = dispatch_semaphore_create(1)) == NULL 56 | #define SEM_INIT(X) (X = dispatch_semaphore_create(0)) == NULL 57 | #define SEM_WAIT(X) dispatch_semaphore_wait(X, DISPATCH_TIME_FOREVER) 58 | #define SEM_TIMEDWAIT(X,T) dispatch_semaphore_wait(X, dispatch_time(DISPATCH_TIME_NOW, (uint64_t)MS(T))) 59 | #define SEM_POST(X) dispatch_semaphore_signal(X) 60 | #define SEM_DESTROY(X) dispatch_release(X) 61 | #define GETTIME(X) X = mach_absolute_time() 62 | #define INITTIME mach_timebase_info_data_t timeinfo; mach_timebase_info(&timeinfo) 63 | #define NANODIFF(STOP,START,DIFF) DIFF = ((STOP-START)*timeinfo.numer)/timeinfo.denom 64 | #elif defined(_WIN32) 65 | #define _Atomic(X) X volatile 66 | #define SEMAPHORE HANDLE 67 | #define TIME struct timespec 68 | #define memory_order_relaxed 1 69 | #define sched_yield SwitchToThread 70 | #define atomic_exchange InterlockedExchangePointer 71 | #define atomic_store(X,V) *(X) = V 72 | #define atomic_init(X,V) *X = V 73 | #define atomic_fetch_add InterlockedAdd64 74 | #define atomic_fetch_sub(X,Y) InterlockedAdd64(X,-Y) 75 | #define atomic_fetch_add_explicit(X,Y,Z) InterlockedAdd64(X,Z) 76 | #define atomic_fetch_sub_explicit(X,Y,Z) InterlockedAdd64(X,-Z) 77 | #define atomic_load_explicit(X,Z) *X 78 | #define SEM_INIT_SET(X) (X = CreateSemaphore(NULL,1,1,NULL)) == NULL 79 | #define SEM_INIT(X) (X = CreateSemaphore(NULL,0,1,NULL)) == NULL 80 | #define SEM_WAIT(X) WaitForSingleObject(X, INFINITE) 81 | #define SEM_TIMEDWAIT(X,T) (WaitForSingleObject(X,T) == WAIT_TIMEOUT) 82 | #define SEM_POST(X) ReleaseSemaphore(X,1,NULL) 83 | #define SEM_DESTROY(X) CloseHandle(X) 84 | #define GETTIME(X) clock_gettime(1,&X) 85 | #define INITTIME 86 | #define NANODIFF(STOP,START,DIFF) \ 87 | DIFF = ((STOP.tv_sec * 1000000000UL) + STOP.tv_nsec) - \ 88 | ((START.tv_sec * 1000000000UL) + START.tv_nsec) 89 | #else 90 | // #define _POSIX_C_SOURCE 199309L 91 | #include 92 | #include 93 | #define SEMAPHORE sem_t 94 | #define TIME struct timespec 95 | #define SEM_INIT_SET(X) sem_init(&X, 0, 1) != 0 96 | #define SEM_INIT(X) sem_init(&X, 0, 0) != 0 97 | #define SEM_WAIT(X) sem_wait(&X) 98 | int SEM_TIMEDWAIT(sem_t s, uint32_t time); 99 | #define SEM_POST(X) sem_post(&X) 100 | #define SEM_DESTROY(X) sem_destroy(&X) 101 | #define GETTIME(X) clock_gettime(CLOCK_MONOTONIC, &X) 102 | #define INITTIME 103 | #define NANODIFF(STOP,START,DIFF) \ 104 | DIFF = ((STOP.tv_sec * 1000000000UL) + STOP.tv_nsec) - \ 105 | ((START.tv_sec * 1000000000UL) + START.tv_nsec) 106 | #endif 107 | 108 | #endif -------------------------------------------------------------------------------- /c_src/wbuf.c: -------------------------------------------------------------------------------- 1 | #include "wbuf.h" 2 | #include 3 | #include 4 | #include 5 | #define PGSZ SQLITE_DEFAULT_PAGE_SIZE 6 | #define ffz(x) ffsll(~(x)) 7 | 8 | #define WBUF_MAP_ELEMENTS(NP) (NP/(sizeof(long long int)*8)) 9 | #define WBUF_MAPBYTES(NP) (WBUF_MAP_ELEMENTS(NP)*sizeof(long long int)) 10 | #define WBUF_SIZE(NP) (PGSZ*NP) 11 | 12 | char* wbuf_init(const int npages) 13 | { 14 | int i; 15 | char *buf; 16 | atomic_llong *map; 17 | 18 | if (npages % 64) 19 | return NULL; 20 | 21 | buf = malloc(WBUF_MAPBYTES(npages)+WBUF_SIZE(npages)); 22 | map = (atomic_llong*)buf; 23 | 24 | memset(buf+WBUF_MAPBYTES(npages), 0, WBUF_SIZE(npages)); 25 | 26 | for (i = 0; i < WBUF_MAP_ELEMENTS(npages); i++) 27 | { 28 | atomic_init(&map[i],0); 29 | } 30 | return buf; 31 | } 32 | 33 | int wbuf_put(const int npages, char *buf1, char *data, int *tries) 34 | { 35 | int i,t = 0; 36 | char *buf = buf1+WBUF_MAPBYTES(npages); 37 | atomic_llong *map = (atomic_llong*)buf1; 38 | int zbit; 39 | 40 | while(1) 41 | { 42 | // Reserve space 43 | for (i = 0; i < WBUF_MAP_ELEMENTS(npages); i++) 44 | { 45 | long long int mval = atomic_load(&map[i]); 46 | long long int nval; 47 | 48 | // if no zero bit go to next element 49 | if (!(zbit = ffz(mval))) 50 | continue; 51 | 52 | nval = mval | (((long long int)1) << (--zbit)); 53 | 54 | // update map val with bit set. 55 | // If successful we are done. 56 | if (atomic_compare_exchange_strong(&map[i], &mval, nval)) 57 | break; 58 | // Unable to exchange, go again for same index. 59 | else 60 | { 61 | i--; 62 | t++; 63 | } 64 | } 65 | 66 | if (i < WBUF_MAP_ELEMENTS(npages)) 67 | { 68 | // Copy data 69 | memcpy(buf + i*64*PGSZ + zbit*PGSZ, data, PGSZ); 70 | break; 71 | } 72 | else 73 | { 74 | usleep(100); 75 | } 76 | } 77 | 78 | if (tries != NULL) 79 | *tries = t; 80 | 81 | return i*64 + zbit; 82 | } 83 | 84 | char* wbuf_get(const int npages, char *buf, int index) 85 | { 86 | return buf+WBUF_MAPBYTES(npages) + index*PGSZ; 87 | } 88 | 89 | void wbuf_release(char *buf1, int index) 90 | { 91 | atomic_llong *map = (atomic_llong*)buf1; 92 | int i = index / 64; 93 | int bit = index % 64; 94 | 95 | while (1) 96 | { 97 | long long int mval = atomic_load(&map[i]); 98 | long long int nval = mval & (~(((long long int)1) << bit)); 99 | if (atomic_compare_exchange_strong(&map[i], &mval, nval)) 100 | break; 101 | } 102 | } 103 | 104 | // 105 | // 106 | // TEST APP 107 | // 108 | // 109 | // gcc c_src/wbuf.c -DTEST_WBUF -DSQLITE_DEFAULT_PAGE_SIZE=4096 -lpthread -O2 -o wb 110 | #ifdef TEST_WBUF 111 | 112 | #define NUM_THREADS 6 113 | #define NPAGES 64 114 | 115 | static void *perform(void *arg) 116 | { 117 | char *wb = (char*)arg; 118 | char bufin[4096]; 119 | char bufout[4096]; 120 | char *op; 121 | int i; 122 | long long int me = (long long int)pthread_self(); 123 | 124 | for (i = 0; i < sizeof(bufin); i++) 125 | bufin[i] = i; 126 | memcpy(bufin, &me, sizeof(long long int)); 127 | memcpy(bufout, bufin, sizeof(bufin)); 128 | 129 | for (i = 0; i < 1000000; i++) 130 | { 131 | int tries = 0; 132 | int index = wbuf_put(NPAGES, wb, bufin, &tries); 133 | op = wbuf_get(NPAGES, wb, index); 134 | memcpy(bufout, op, sizeof(bufout)); 135 | wbuf_release(wb, index); 136 | 137 | if (memcmp(bufin, bufout, sizeof(bufin)) != 0) 138 | { 139 | printf("ERROR! %d\n", i); 140 | exit(1); 141 | } 142 | // if (tries > 10) 143 | // printf("%lld, i=%d, tries=%d, index=%d\n",me, i, tries, index); 144 | 145 | // if ((i % 10000) == 0) 146 | // printf("%lld, i=%d, tries=%d, index=%d\n",me, i, tries, index); 147 | } 148 | 149 | return NULL; 150 | } 151 | 152 | 153 | int main(int argc, const char* argv[]) 154 | { 155 | char* w; 156 | int i; 157 | pthread_t threads[NUM_THREADS]; 158 | 159 | // less available space than threads 160 | w = wbuf_init(NPAGES); 161 | for (i = 0; i < NUM_THREADS; i++) 162 | { 163 | pthread_create(&threads[i], NULL, perform, (void *)w); 164 | usleep(100); 165 | } 166 | 167 | perform(w); 168 | for (i = 0; i < NUM_THREADS; i++) 169 | pthread_join(threads[i],NULL); 170 | 171 | printf("No thread sync errors!\n"); 172 | return 0; 173 | } 174 | 175 | #endif -------------------------------------------------------------------------------- /src/actordb_driver_nif.erl: -------------------------------------------------------------------------------- 1 | % This Source Code Form is subject to the terms of the Mozilla Public 2 | % License, v. 2.0. If a copy of the MPL was not distributed with this 3 | % file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | -module(actordb_driver_nif). 5 | 6 | -export([init/1, 7 | open/5, 8 | open/6, 9 | exec_script/7, 10 | exec_script/8, 11 | exec_read/4, 12 | exec_read/5, 13 | store_prepared_table/2, 14 | close/3, 15 | noop/3, 16 | parse_helper/2, 17 | % interrupt_query/1, 18 | lz4_compress/1, 19 | lz4_decompress/2, 20 | lz4_decompress/3, 21 | replicate_opts/3, 22 | replication_done/1, 23 | page_size/0, 24 | iterate_db/4, 25 | iterate_db/5, 26 | iterate_close/1, 27 | all_tunnel_call/3, 28 | all_tunnel_call/4, 29 | checkpoint_lock/4, 30 | inject_page/5, 31 | wal_rewind/4, 32 | wal_rewind/5, 33 | checkpoint/4, 34 | term_store/3, 35 | term_store/4, 36 | actor_info/4, 37 | fsync_num/1, 38 | fsync/3, 39 | fsync/0, 40 | stmt_info/4, 41 | set_tunnel_connector/0, 42 | set_thread_fd/4, 43 | counter_inc/2 44 | ]). 45 | 46 | counter_inc(_,_) -> 47 | exit(nif_library_not_loaded). 48 | 49 | stmt_info(_,_,_,_) -> 50 | exit(nif_library_not_loaded). 51 | 52 | actor_info(_,_,_,_) -> 53 | exit(nif_library_not_loaded). 54 | 55 | exec_read(_,_,_,_) -> 56 | exit(nif_library_not_loaded). 57 | 58 | exec_read(_,_,_,_,_) -> 59 | exit(nif_library_not_loaded). 60 | 61 | replication_done(_) -> 62 | exit(nif_library_not_loaded). 63 | 64 | fsync() -> 65 | exit(nif_library_not_loaded). 66 | 67 | fsync(_,_,_) -> 68 | exit(nif_library_not_loaded). 69 | 70 | term_store(_,_,_) -> 71 | exit(nif_library_not_loaded). 72 | 73 | term_store(_,_,_,_) -> 74 | exit(nif_library_not_loaded). 75 | 76 | fsync_num(_) -> 77 | exit(nif_library_not_loaded). 78 | 79 | checkpoint(_,_,_,_) -> 80 | exit(nif_library_not_loaded). 81 | 82 | wal_rewind(_,_,_,_) -> 83 | exit(nif_library_not_loaded). 84 | 85 | wal_rewind(_,_,_,_,_) -> 86 | exit(nif_library_not_loaded). 87 | 88 | page_size() -> 89 | exit(nif_library_not_loaded). 90 | 91 | iterate_db(_,_,_,_) -> 92 | exit(nif_library_not_loaded). 93 | iterate_db(_,_,_,_,_) -> 94 | exit(nif_library_not_loaded). 95 | 96 | iterate_close(_) -> 97 | exit(nif_library_not_loaded). 98 | 99 | checkpoint_lock(_,_,_,_) -> 100 | exit(nif_library_not_loaded). 101 | 102 | inject_page(_,_,_,_,_) -> 103 | exit(nif_library_not_loaded). 104 | 105 | store_prepared_table(_,_) -> 106 | exit(nif_library_not_loaded). 107 | 108 | all_tunnel_call(_,_,_) -> 109 | exit(nif_library_not_loaded). 110 | 111 | all_tunnel_call(_,_,_,_) -> 112 | exit(nif_library_not_loaded). 113 | 114 | noop(_,_,_) -> 115 | exit(nif_library_not_loaded). 116 | 117 | replicate_opts(_,_,_) -> 118 | exit(nif_library_not_loaded). 119 | 120 | % interrupt_query(_) -> 121 | % exit(nif_library_not_loaded). 122 | 123 | parse_helper(_,_) -> 124 | exit(nif_library_not_loaded). 125 | 126 | lz4_compress(_) -> 127 | exit(nif_library_not_loaded). 128 | 129 | lz4_decompress(_,_) -> 130 | exit(nif_library_not_loaded). 131 | 132 | lz4_decompress(_,_,_) -> 133 | exit(nif_library_not_loaded). 134 | 135 | set_tunnel_connector() -> 136 | exit(nif_library_not_loaded). 137 | 138 | set_thread_fd(_,_,_,_) -> 139 | exit(nif_library_not_loaded). 140 | 141 | open(_Ref, _Dest, _Filename,_ThreadNumber,_Mode) -> 142 | exit(nif_library_not_loaded). 143 | open(_Ref, _Dest, _Filename,_ThreadNumber,_Sql,_Mode) -> 144 | exit(nif_library_not_loaded). 145 | 146 | exec_script(_Db, _Ref, _Dest, _Sql,_Term,_Index,_AParam) -> 147 | exit(nif_library_not_loaded). 148 | 149 | exec_script(_Db, _Ref, _Dest, _Sql,_Term,_Index,_AParam,_RecordInsert) -> 150 | exit(nif_library_not_loaded). 151 | 152 | close(_Db, _Ref, _Dest) -> 153 | exit(nif_library_not_loaded). 154 | 155 | init(Info) -> 156 | NifName = "actordb_driver_nif", 157 | NifFileName = case code:priv_dir(actordb_driver) of 158 | {error, bad_name} -> filename:join("priv", NifName); 159 | Dir -> filename:join(Dir, NifName) 160 | end, 161 | S = integer_to_list(calendar:datetime_to_gregorian_seconds(erlang:localtime())), 162 | case erlang:load_nif(NifFileName, Info#{logname => "drv_"++hd(string:tokens(atom_to_list(node()),"@"))++"_"++S++".txt"}) of 163 | ok -> 164 | ok; 165 | {error,{upgrade,_}} -> 166 | ok; 167 | {error,{reload,_}} -> 168 | ok 169 | end. 170 | -------------------------------------------------------------------------------- /c_src/queue.c: -------------------------------------------------------------------------------- 1 | // This file is part of Emonk released under the MIT license. 2 | // See the LICENSE file for more information. 3 | 4 | /* Adapted by: Maas-Maarten Zeeman 8 | #include 9 | #include 10 | #include "queue.h" 11 | #define BLOCK_SIZE 128 12 | 13 | 14 | // void* 15 | // queue_get_item_data(void* item) 16 | // { 17 | // qitem* it = (qitem*)item; 18 | // return it->data; 19 | // } 20 | // 21 | // void 22 | // queue_set_item_data(void* item, void *data) 23 | // { 24 | // qitem* it = (qitem*)item; 25 | // it->data = data; 26 | // } 27 | 28 | #ifndef _TESTAPP_ 29 | #if USE_SEM 30 | static void qwait(queue *q) 31 | { 32 | while (q->head == NULL) 33 | { 34 | enif_mutex_unlock(q->lock); 35 | cond_wait(q->cond, q->lock); 36 | while (enif_mutex_trylock(q->lock) != 0) 37 | { 38 | } 39 | } 40 | } 41 | #else 42 | static void qwait(queue *q) 43 | { 44 | while (q->head == NULL) 45 | cond_wait(q->cond, q->lock); 46 | } 47 | #endif 48 | #endif 49 | 50 | queue *queue_create() 51 | { 52 | queue *ret; 53 | // int i = 0; 54 | // qitem *item; 55 | 56 | ret = (queue *) enif_alloc(sizeof(struct queue_t)); 57 | if(ret == NULL) goto error; 58 | memset(ret, 0, sizeof(struct queue_t)); 59 | 60 | ret->lock = enif_mutex_create("queue_lock"); 61 | if(ret->lock == NULL) goto error; 62 | 63 | cond_init(ret->cond); 64 | 65 | return ret; 66 | 67 | error: 68 | if(ret) 69 | { 70 | enif_mutex_destroy(ret->lock); 71 | cond_destroy(ret->cond); 72 | enif_free(ret); 73 | } 74 | return NULL; 75 | } 76 | 77 | void 78 | queue_destroy(queue *queue) 79 | { 80 | ErlNifMutex *lock; 81 | COND_T cond; 82 | qitem *blocks = NULL; 83 | 84 | enif_mutex_lock(queue->lock); 85 | lock = queue->lock; 86 | cond = queue->cond; 87 | 88 | while(queue->reuseq != NULL) 89 | { 90 | qitem *tmp = queue->reuseq->next; 91 | if(tmp != NULL && tmp->env != NULL) 92 | enif_free_env(tmp->env); 93 | if (tmp != NULL && tmp->cmd != NULL) 94 | enif_free(tmp->cmd); 95 | if (queue->reuseq->blockStart) 96 | { 97 | queue->reuseq->next = blocks; 98 | blocks = queue->reuseq; 99 | } 100 | queue->reuseq = tmp; 101 | } 102 | while (blocks != NULL) 103 | { 104 | qitem *tmp = blocks->next; 105 | enif_free(blocks); 106 | blocks = tmp; 107 | } 108 | enif_mutex_unlock(lock); 109 | 110 | cond_destroy(cond); 111 | enif_mutex_destroy(lock); 112 | enif_free(queue); 113 | } 114 | 115 | 116 | int 117 | queue_push(queue *queue, qitem *entry) 118 | { 119 | while (enif_mutex_trylock(queue->lock) != 0) 120 | { 121 | } 122 | 123 | assert(queue->length >= 0 && "Invalid queue size at push"); 124 | 125 | if(queue->tail != NULL) 126 | queue->tail->next = entry; 127 | 128 | queue->tail = entry; 129 | 130 | if(queue->head == NULL) 131 | queue->head = queue->tail; 132 | 133 | queue->length += 1; 134 | cond_signal(queue->cond); 135 | enif_mutex_unlock(queue->lock); 136 | 137 | return 1; 138 | } 139 | 140 | int queue_size(queue *queue) 141 | { 142 | int r = 0; 143 | while (enif_mutex_trylock(queue->lock) != 0) 144 | { 145 | } 146 | r = queue->length; 147 | enif_mutex_unlock(queue->lock); 148 | return r; 149 | } 150 | 151 | qitem* 152 | queue_pop(queue *queue) 153 | { 154 | qitem *entry; 155 | 156 | while (enif_mutex_trylock(queue->lock) != 0) 157 | { 158 | } 159 | qwait(queue); 160 | 161 | assert(queue->length >= 0 && "Invalid queue size at pop."); 162 | 163 | /* Woke up because queue->head != NULL 164 | * Remove the entry and return the payload. 165 | */ 166 | entry = queue->head; 167 | queue->head = entry->next; 168 | entry->next = NULL; 169 | 170 | if(queue->head == NULL) { 171 | assert(queue->tail == entry && "Invalid queue state: Bad tail."); 172 | queue->tail = NULL; 173 | } 174 | 175 | queue->length--; 176 | 177 | enif_mutex_unlock(queue->lock); 178 | 179 | return entry; 180 | } 181 | 182 | void 183 | queue_recycle(queue *queue,qitem *entry) 184 | { 185 | while (enif_mutex_trylock(queue->lock) != 0) 186 | { 187 | } 188 | entry->next = queue->reuseq; 189 | queue->reuseq = entry; 190 | enif_mutex_unlock(queue->lock); 191 | } 192 | 193 | qitem* 194 | queue_get_item(queue *queue) 195 | { 196 | qitem *entry = NULL; 197 | int i; 198 | while (enif_mutex_trylock(queue->lock) != 0) 199 | { 200 | } 201 | if (queue->reuseq != NULL) 202 | { 203 | entry = queue->reuseq; 204 | queue->reuseq = queue->reuseq->next; 205 | entry->next = NULL; 206 | } 207 | else 208 | { 209 | entry = enif_alloc(sizeof(qitem)*BLOCK_SIZE); 210 | memset(entry,0,sizeof(qitem)*BLOCK_SIZE); 211 | 212 | for (i = 1; i < BLOCK_SIZE; i++) 213 | { 214 | entry[i].env = enif_alloc_env(); 215 | entry[i].next = queue->reuseq; 216 | queue->reuseq = &entry[i]; 217 | } 218 | entry->env = enif_alloc_env(); 219 | entry->blockStart = 1; 220 | } 221 | enif_mutex_unlock(queue->lock); 222 | return entry; 223 | } 224 | 225 | -------------------------------------------------------------------------------- /c_src/midl.h: -------------------------------------------------------------------------------- 1 | /** @file midl.h 2 | * @brief LMDB ID List header file. 3 | * 4 | * This file was originally part of back-bdb but has been 5 | * modified for use in libmdb. Most of the macros defined 6 | * in this file are unused, just left over from the original. 7 | * 8 | * This file is only used internally in libmdb and its definitions 9 | * are not exposed publicly. 10 | */ 11 | /* $OpenLDAP$ */ 12 | /* This work is part of OpenLDAP Software . 13 | * 14 | * Copyright 2000-2015 The OpenLDAP Foundation. 15 | * All rights reserved. 16 | * 17 | * Redistribution and use in source and binary forms, with or without 18 | * modification, are permitted only as authorized by the OpenLDAP 19 | * Public License. 20 | * 21 | * A copy of this license is available in the file LICENSE in the 22 | * top-level directory of the distribution or, alternatively, at 23 | * . 24 | */ 25 | 26 | #ifndef _MDB_MIDL_H_ 27 | #define _MDB_MIDL_H_ 28 | 29 | #include 30 | 31 | #ifdef __cplusplus 32 | extern "C" { 33 | #endif 34 | 35 | /** @defgroup internal LMDB Internals 36 | * @{ 37 | */ 38 | 39 | /** @defgroup idls ID List Management 40 | * @{ 41 | */ 42 | /** A generic unsigned ID number. These were entryIDs in back-bdb. 43 | * Preferably it should have the same size as a pointer. 44 | */ 45 | typedef size_t MDB_ID; 46 | 47 | /** An IDL is an ID List, a sorted array of IDs. The first 48 | * element of the array is a counter for how many actual 49 | * IDs are in the list. In the original back-bdb code, IDLs are 50 | * sorted in ascending order. For libmdb IDLs are sorted in 51 | * descending order. 52 | */ 53 | typedef MDB_ID *MDB_IDL; 54 | 55 | /* IDL sizes - likely should be even bigger 56 | * limiting factors: sizeof(ID), thread stack size 57 | */ 58 | #define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ 59 | #define MDB_IDL_DB_SIZE (1< 2 | #include 3 | #include "lfqueue.h" 4 | #ifndef _WIN32 5 | #include 6 | #endif 7 | 8 | 9 | #ifdef _WIN32 10 | #define __thread __declspec( thread ) 11 | #endif 12 | 13 | static __thread intq *tls_reuseq = NULL; 14 | static __thread uint64_t tls_qsize = 0; 15 | 16 | // MPSC lock free queue based on 17 | // http://www.1024cores.net/home/lock-free-algorithms/queues/non-intrusive-mpsc-node-based-queue 18 | static void initq(intq *q) 19 | { 20 | qitem *stub = calloc(1,sizeof(qitem)); 21 | stub->blockStart = 1; 22 | stub->home = q; 23 | 24 | atomic_store(&q->head, stub); 25 | q->tail = stub; 26 | } 27 | // Actual queue push. 28 | static void qpush(intq* self, qitem* n) 29 | { 30 | qitem* prev; 31 | atomic_store(&n->next, 0); 32 | prev = atomic_exchange(&self->head, n); 33 | atomic_store(&prev->next, n); 34 | } 35 | // Actual queue pop. 36 | static qitem* qpop(intq* self) 37 | { 38 | qitem* tail = self->tail; 39 | qitem* next = tail->next; 40 | if (next) 41 | { 42 | self->tail = next; 43 | tail->cmd = next->cmd; 44 | tail->home = next->home; 45 | #ifndef _TESTAPP_ 46 | tail->env = next->env; 47 | #endif 48 | return tail; 49 | } 50 | return NULL; 51 | } 52 | 53 | // External API. For a task queue. 54 | // Actually uses multiple internal queues. 55 | // The main queue is X producers to 1 consumer. Scheduler threads to a worker. 56 | // Recycle queues are between all worker threads and a scheduler. 57 | // All based on initq/qpush/qpop. 58 | // Fixed size and does no allocations after first calls. 59 | queue *queue_create() 60 | { 61 | queue *ret; 62 | 63 | ret = (queue *) calloc(1,sizeof(struct queue_t)); 64 | if(ret == NULL) 65 | return NULL; 66 | 67 | if (SEM_INIT(ret->sem)) 68 | { 69 | return NULL; 70 | } 71 | 72 | initq(&ret->q); 73 | 74 | return ret; 75 | } 76 | 77 | void queue_destroy(queue *queue) 78 | { 79 | SEM_DESTROY(queue->sem); 80 | free(queue); 81 | } 82 | 83 | // Push item from scheduler thread to worker thread. 84 | int queue_push(queue *queue, qitem *entry) 85 | { 86 | qpush(&queue->q, entry); 87 | SEM_POST(queue->sem); 88 | return 1; 89 | } 90 | 91 | // Return item if available, otherwise NULL. 92 | qitem* queue_trypop(queue *queue) 93 | { 94 | return qpop(&queue->q); 95 | } 96 | 97 | // Get item or wait max time. 98 | qitem* queue_timepop(queue *queue, uint32_t miliseconds) 99 | { 100 | qitem *r = qpop(&queue->q); 101 | if (r) 102 | return r; 103 | else 104 | { 105 | if (SEM_TIMEDWAIT(queue->sem, miliseconds) != 0) 106 | return NULL; 107 | else 108 | return qpop(&queue->q); 109 | } 110 | } 111 | 112 | // Called on worker thread to get an item. Will wait if no items are available. 113 | // Does busy wait for 2ms. 114 | qitem* queue_pop(queue *queue) 115 | { 116 | qitem *r = qpop(&queue->q); 117 | if (r) 118 | return r; 119 | else 120 | { 121 | TIME start; 122 | GETTIME(start); 123 | INITTIME; 124 | while (1) 125 | { 126 | uint64_t diff; 127 | TIME stop; 128 | sched_yield(); 129 | GETTIME(stop); 130 | NANODIFF(stop, start, diff); 131 | 132 | r = qpop(&queue->q); 133 | if (r) 134 | return r; 135 | if (diff > 2000000) // 2ms max busy wait 136 | { 137 | SEM_WAIT(queue->sem); 138 | } 139 | } 140 | } 141 | } 142 | 143 | // Push entry back to home queue. 144 | // Called from worker thread to give an entry back to a scheduler thread. 145 | void queue_recycle(qitem *entry) 146 | { 147 | qpush(entry->home, entry); 148 | } 149 | 150 | // Called only once per thread. 151 | static void populate(intq *q) 152 | { 153 | int i; 154 | qitem *entry = calloc(QSIZE,sizeof(qitem)); 155 | 156 | #ifndef _TESTAPP_ 157 | entry[0].env = enif_alloc_env(); 158 | #endif 159 | entry[0].blockStart = 1; 160 | entry[0].home = q; 161 | qpush(q, &entry[0]); 162 | for (i = 1; i < QSIZE; i++) 163 | { 164 | #ifndef _TESTAPP_ 165 | entry[i].env = enif_alloc_env(); 166 | #endif 167 | entry[i].home = q; 168 | qpush(q, &entry[i]); 169 | } 170 | tls_qsize += QSIZE; 171 | } 172 | 173 | // scheduler thread is the single consumer of tls_reuseq 174 | // producers are worker threads or scheduler thread itself. 175 | // We have a fixed number of events that are populated on first call. 176 | // If return NULL, caller should busy wait, go do something else or sleep. 177 | qitem* queue_get_item(void) 178 | { 179 | // qitem *res; 180 | if (tls_reuseq == NULL) 181 | { 182 | tls_reuseq = calloc(1,sizeof(intq)); 183 | initq(tls_reuseq); 184 | populate(tls_reuseq); 185 | } 186 | return qpop(tls_reuseq); 187 | // if (!res) 188 | // { 189 | // populate(tls_reuseq); 190 | // return qpop(tls_reuseq); 191 | // } 192 | // return res; 193 | } 194 | 195 | void queue_intq_destroy(intq *q) 196 | { 197 | qitem *it; 198 | if (q == NULL) 199 | return; 200 | while ((it = qpop(q))) 201 | { 202 | #ifndef _TESTAPP_ 203 | if (it->env) 204 | enif_free_env(it->env); 205 | #endif 206 | if (it->blockStart) 207 | free(it); 208 | } 209 | if (q->head) 210 | { 211 | if (q->head->blockStart) 212 | free(q->head); 213 | } 214 | free(q); 215 | } 216 | 217 | 218 | // 219 | // 220 | // TEST APP 221 | // 222 | // 223 | // gcc c_src/lfqueue.c -DTEST_LQUEUE -D_TESTAPP_ -DSQLITE_DEFAULT_PAGE_SIZE=4096 -lpthread -o lfq 224 | #ifdef TEST_LQUEUE 225 | typedef struct item 226 | { 227 | int thread; 228 | uint64_t n; 229 | uint32_t recycled; 230 | }item; 231 | 232 | typedef struct threadinf 233 | { 234 | int thread; 235 | queue *q; 236 | }threadinf; 237 | 238 | #define ITERATIONS 1000000 239 | #define NUM_THREADS 20 240 | 241 | static void *producer(void *arg) 242 | { 243 | threadinf *inf = (threadinf*)arg; 244 | char *op; 245 | uint64_t i; 246 | long long int me = (long long int)pthread_self(); 247 | uint64_t val = 1; 248 | 249 | while (val < ITERATIONS) 250 | { 251 | // printf("PULL! %lld\n",me); 252 | item *it; 253 | qitem *qi = queue_get_item(inf->q); 254 | if (!qi) 255 | continue; 256 | if (qi->cmd == NULL) 257 | qi->cmd = calloc(1,sizeof(item)); 258 | if (qi->home != tls_reuseq) 259 | { 260 | printf("Item returned to wrong home!\n"); 261 | exit(1); 262 | } 263 | it = (item*)qi->cmd; 264 | // if (it->recycled) 265 | // printf("RECYCLED! %u %d\n",it->recycled, inf->thread); 266 | it->n = val++; 267 | it->thread = inf->thread; 268 | queue_push(inf->q,qi); 269 | // if (tries > 10) 270 | // printf("%lld, i=%d, tries=%d, index=%d\n",me, i, tries, index); 271 | 272 | // if ((i % 10000) == 0) 273 | // printf("pthr=%lld, i=%d\n",me, i); 274 | } 275 | printf("Thread done %llu\n", tls_qsize); 276 | 277 | return NULL; 278 | } 279 | 280 | 281 | int main(int argc, const char* argv[]) 282 | { 283 | queue* q; 284 | int i; 285 | pthread_t threads[NUM_THREADS]; 286 | threadinf infos[NUM_THREADS]; 287 | uint64_t thrnums[NUM_THREADS]; 288 | 289 | q = queue_create(); 290 | for (i = 0; i < NUM_THREADS; i++) 291 | { 292 | thrnums[i] = 1; 293 | infos[i].thread = i; 294 | infos[i].q = q; 295 | pthread_create(&threads[i], NULL, producer, (void *)&infos[i]); 296 | } 297 | 298 | // for (i = 0; i < ITERATIONS*NUM_THREADS; i++) 299 | i = 0; 300 | TIME start; 301 | GETTIME(start); 302 | while (i < NUM_THREADS) 303 | { 304 | qitem *qi = queue_pop(q); 305 | item *it = (item*)qi->cmd; 306 | if (thrnums[it->thread] != it->n) 307 | { 308 | printf("Items not sequential thread=%d, n=%llu, shouldbe=%llu, recycled=%u!!\n", 309 | it->thread, it->n, thrnums[it->thread], it->recycled); 310 | return 0; 311 | } 312 | thrnums[it->thread]++; 313 | if (thrnums[it->thread] == ITERATIONS) 314 | i++; 315 | // printf("Recycle thr=%d val=%llu, recycled=%u\n",it->thread,it->n, it->recycled++); 316 | queue_recycle(q,qi); 317 | } 318 | uint64_t diff; 319 | TIME stop; 320 | GETTIME(stop); 321 | NANODIFF(stop, start, diff); 322 | printf("Done in: %llums\n",diff / 1000000); 323 | 324 | // for (i = 0; i < NUM_THREADS; i++) 325 | // printf("threadpos =%llu\n",thrnums[i]); 326 | // pthread_join(threads[i],NULL); 327 | 328 | // printf("No thread sync errors!\n"); 329 | return 0; 330 | } 331 | 332 | #endif 333 | -------------------------------------------------------------------------------- /c_src/midl.c: -------------------------------------------------------------------------------- 1 | /** @file midl.c 2 | * @brief ldap bdb back-end ID List functions */ 3 | /* $OpenLDAP$ */ 4 | /* This work is part of OpenLDAP Software . 5 | * 6 | * Copyright 2000-2015 The OpenLDAP Foundation. 7 | * All rights reserved. 8 | * 9 | * Redistribution and use in source and binary forms, with or without 10 | * modification, are permitted only as authorized by the OpenLDAP 11 | * Public License. 12 | * 13 | * A copy of this license is available in the file LICENSE in the 14 | * top-level directory of the distribution or, alternatively, at 15 | * . 16 | */ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "midl.h" 24 | 25 | /** @defgroup internal LMDB Internals 26 | * @{ 27 | */ 28 | /** @defgroup idls ID List Management 29 | * @{ 30 | */ 31 | #define CMP(x,y) ( (x) < (y) ? -1 : (x) > (y) ) 32 | 33 | unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id ) 34 | { 35 | /* 36 | * binary search of id in ids 37 | * if found, returns position of id 38 | * if not found, returns first position greater than id 39 | */ 40 | unsigned base = 0; 41 | unsigned cursor = 1; 42 | int val = 0; 43 | unsigned n = ids[0]; 44 | 45 | while( 0 < n ) { 46 | unsigned pivot = n >> 1; 47 | cursor = base + pivot + 1; 48 | val = CMP( ids[cursor], id ); 49 | 50 | if( val < 0 ) { 51 | n = pivot; 52 | 53 | } else if ( val > 0 ) { 54 | base = cursor; 55 | n -= pivot + 1; 56 | 57 | } else { 58 | return cursor; 59 | } 60 | } 61 | 62 | if( val > 0 ) { 63 | ++cursor; 64 | } 65 | return cursor; 66 | } 67 | 68 | #if 0 /* superseded by append/sort */ 69 | int mdb_midl_insert( MDB_IDL ids, MDB_ID id ) 70 | { 71 | unsigned x, i; 72 | 73 | x = mdb_midl_search( ids, id ); 74 | assert( x > 0 ); 75 | 76 | if( x < 1 ) { 77 | /* internal error */ 78 | return -2; 79 | } 80 | 81 | if ( x <= ids[0] && ids[x] == id ) { 82 | /* duplicate */ 83 | assert(0); 84 | return -1; 85 | } 86 | 87 | if ( ++ids[0] >= MDB_IDL_DB_MAX ) { 88 | /* no room */ 89 | --ids[0]; 90 | return -2; 91 | 92 | } else { 93 | /* insert id */ 94 | for (i=ids[0]; i>x; i--) 95 | ids[i] = ids[i-1]; 96 | ids[x] = id; 97 | } 98 | 99 | return 0; 100 | } 101 | #endif 102 | 103 | MDB_IDL mdb_midl_alloc(int num) 104 | { 105 | MDB_IDL ids = malloc((num+2) * sizeof(MDB_ID)); 106 | if (ids) { 107 | *ids++ = num; 108 | *ids = 0; 109 | } 110 | return ids; 111 | } 112 | 113 | void mdb_midl_free(MDB_IDL ids) 114 | { 115 | if (ids) 116 | free(ids-1); 117 | } 118 | 119 | void mdb_midl_shrink( MDB_IDL *idp ) 120 | { 121 | MDB_IDL ids = *idp; 122 | if (*(--ids) > MDB_IDL_UM_MAX && 123 | (ids = realloc(ids, (MDB_IDL_UM_MAX+2) * sizeof(MDB_ID)))) 124 | { 125 | *ids++ = MDB_IDL_UM_MAX; 126 | *idp = ids; 127 | } 128 | } 129 | 130 | static int mdb_midl_grow( MDB_IDL *idp, int num ) 131 | { 132 | MDB_IDL idn = *idp-1; 133 | /* grow it */ 134 | idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID)); 135 | if (!idn) 136 | return ENOMEM; 137 | *idn++ += num; 138 | *idp = idn; 139 | return 0; 140 | } 141 | 142 | int mdb_midl_need( MDB_IDL *idp, unsigned num ) 143 | { 144 | MDB_IDL ids = *idp; 145 | num += ids[0]; 146 | if (num > ids[-1]) { 147 | num = (num + num/4 + (256 + 2)) & -256; 148 | if (!(ids = realloc(ids-1, num * sizeof(MDB_ID)))) 149 | return ENOMEM; 150 | *ids++ = num - 2; 151 | *idp = ids; 152 | } 153 | return 0; 154 | } 155 | 156 | int mdb_midl_append( MDB_IDL *idp, MDB_ID id ) 157 | { 158 | MDB_IDL ids = *idp; 159 | /* Too big? */ 160 | if (ids[0] >= ids[-1]) { 161 | if (mdb_midl_grow(idp, MDB_IDL_UM_MAX)) 162 | return ENOMEM; 163 | ids = *idp; 164 | } 165 | ids[0]++; 166 | ids[ids[0]] = id; 167 | return 0; 168 | } 169 | 170 | int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app ) 171 | { 172 | MDB_IDL ids = *idp; 173 | /* Too big? */ 174 | if (ids[0] + app[0] >= ids[-1]) { 175 | if (mdb_midl_grow(idp, app[0])) 176 | return ENOMEM; 177 | ids = *idp; 178 | } 179 | memcpy(&ids[ids[0]+1], &app[1], app[0] * sizeof(MDB_ID)); 180 | ids[0] += app[0]; 181 | return 0; 182 | } 183 | 184 | int mdb_midl_append_range( MDB_IDL *idp, MDB_ID id, unsigned n ) 185 | { 186 | MDB_ID *ids = *idp, len = ids[0]; 187 | /* Too big? */ 188 | if (len + n > ids[-1]) { 189 | if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX)) 190 | return ENOMEM; 191 | ids = *idp; 192 | } 193 | ids[0] = len + n; 194 | ids += len; 195 | while (n) 196 | ids[n--] = id++; 197 | return 0; 198 | } 199 | 200 | void mdb_midl_xmerge( MDB_IDL idl, MDB_IDL merge ) 201 | { 202 | MDB_ID old_id, merge_id, i = merge[0], j = idl[0], k = i+j, total = k; 203 | idl[0] = (MDB_ID)-1; /* delimiter for idl scan below */ 204 | old_id = idl[j]; 205 | while (i) { 206 | merge_id = merge[i--]; 207 | for (; old_id < merge_id; old_id = idl[--j]) 208 | idl[k--] = old_id; 209 | idl[k--] = merge_id; 210 | } 211 | idl[0] = total; 212 | } 213 | 214 | /* Quicksort + Insertion sort for small arrays */ 215 | 216 | #define SMALL 8 217 | #define MIDL_SWAP(a,b) { itmp=(a); (a)=(b); (b)=itmp; } 218 | 219 | void 220 | mdb_midl_sort( MDB_IDL ids ) 221 | { 222 | /* Max possible depth of int-indexed tree * 2 items/level */ 223 | int istack[sizeof(int)*CHAR_BIT * 2]; 224 | int i,j,k,l,ir,jstack; 225 | MDB_ID a, itmp; 226 | 227 | ir = (int)ids[0]; 228 | l = 1; 229 | jstack = 0; 230 | for(;;) { 231 | if (ir - l < SMALL) { /* Insertion sort */ 232 | for (j=l+1;j<=ir;j++) { 233 | a = ids[j]; 234 | for (i=j-1;i>=1;i--) { 235 | if (ids[i] >= a) break; 236 | ids[i+1] = ids[i]; 237 | } 238 | ids[i+1] = a; 239 | } 240 | if (jstack == 0) break; 241 | ir = istack[jstack--]; 242 | l = istack[jstack--]; 243 | } else { 244 | k = (l + ir) >> 1; /* Choose median of left, center, right */ 245 | MIDL_SWAP(ids[k], ids[l+1]); 246 | if (ids[l] < ids[ir]) { 247 | MIDL_SWAP(ids[l], ids[ir]); 248 | } 249 | if (ids[l+1] < ids[ir]) { 250 | MIDL_SWAP(ids[l+1], ids[ir]); 251 | } 252 | if (ids[l] < ids[l+1]) { 253 | MIDL_SWAP(ids[l], ids[l+1]); 254 | } 255 | i = l+1; 256 | j = ir; 257 | a = ids[l+1]; 258 | for(;;) { 259 | do i++; while(ids[i] > a); 260 | do j--; while(ids[j] < a); 261 | if (j < i) break; 262 | MIDL_SWAP(ids[i],ids[j]); 263 | } 264 | ids[l+1] = ids[j]; 265 | ids[j] = a; 266 | jstack += 2; 267 | if (ir-i+1 >= j-l) { 268 | istack[jstack] = ir; 269 | istack[jstack-1] = i; 270 | ir = j-1; 271 | } else { 272 | istack[jstack] = j-1; 273 | istack[jstack-1] = l; 274 | l = i; 275 | } 276 | } 277 | } 278 | } 279 | 280 | unsigned mdb_mid2l_search( MDB_ID2L ids, MDB_ID id ) 281 | { 282 | /* 283 | * binary search of id in ids 284 | * if found, returns position of id 285 | * if not found, returns first position greater than id 286 | */ 287 | unsigned base = 0; 288 | unsigned cursor = 1; 289 | int val = 0; 290 | unsigned n = (unsigned)ids[0].mid; 291 | 292 | while( 0 < n ) { 293 | unsigned pivot = n >> 1; 294 | cursor = base + pivot + 1; 295 | val = CMP( id, ids[cursor].mid ); 296 | 297 | if( val < 0 ) { 298 | n = pivot; 299 | 300 | } else if ( val > 0 ) { 301 | base = cursor; 302 | n -= pivot + 1; 303 | 304 | } else { 305 | return cursor; 306 | } 307 | } 308 | 309 | if( val > 0 ) { 310 | ++cursor; 311 | } 312 | return cursor; 313 | } 314 | 315 | int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id ) 316 | { 317 | unsigned x, i; 318 | 319 | x = mdb_mid2l_search( ids, id->mid ); 320 | 321 | if( x < 1 ) { 322 | /* internal error */ 323 | return -2; 324 | } 325 | 326 | if ( x <= ids[0].mid && ids[x].mid == id->mid ) { 327 | /* duplicate */ 328 | return -1; 329 | } 330 | 331 | if ( ids[0].mid >= MDB_IDL_UM_MAX ) { 332 | /* too big */ 333 | return -2; 334 | 335 | } else { 336 | /* insert id */ 337 | ids[0].mid++; 338 | for (i=(unsigned)ids[0].mid; i>x; i--) 339 | ids[i] = ids[i-1]; 340 | ids[x] = *id; 341 | } 342 | 343 | return 0; 344 | } 345 | 346 | int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id ) 347 | { 348 | /* Too big? */ 349 | if (ids[0].mid >= MDB_IDL_UM_MAX) { 350 | return -2; 351 | } 352 | ids[0].mid++; 353 | ids[ids[0].mid] = *id; 354 | return 0; 355 | } 356 | 357 | /** @} */ 358 | /** @} */ 359 | -------------------------------------------------------------------------------- /c_src/actordb_driver_nif.h: -------------------------------------------------------------------------------- 1 | #ifndef ACTORDB_DRIVER_NIF_H 2 | #define ACTORDB_DRIVER_NIF_H 3 | #ifdef _WIN32 4 | #define snprintf _snprintf 5 | #endif 6 | #include "lmdb.h" 7 | #ifndef _TESTAPP_ 8 | #include "erl_nif.h" 9 | #endif 10 | 11 | #define MAX_ATOM_LENGTH 255 12 | #define MAX_PATHNAME 512 13 | #define PAGE_BUFF_SIZE 4300 14 | #define MAX_CONNECTIONS 8 15 | #define MAX_STATIC_SQLS 11 16 | #define MAX_PREP_SQLS 100 17 | #define MAX_ACTOR_NAME 92 18 | 19 | #include "lfqueue.h" 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include "platform.h" 25 | 26 | FILE *g_log = 0; 27 | #if defined(_TESTDBG_) 28 | #ifndef _WIN32 29 | #define DBG(X, ...) fprintf(g_log,"thr=%lld: " X "\r\n",(i64)pthread_self(),##__VA_ARGS__) ;fflush(g_log); 30 | #else 31 | #define DBG(X, ...) fprintf(g_log, "thr=%lld: " X "\r",(i64)GetCurrentThreadId(),##__VA_ARGS__) ;fflush(g_log); 32 | #endif 33 | #else 34 | # define DBG(X, ...) 35 | #endif 36 | 37 | 38 | typedef struct db_connection db_connection; 39 | typedef struct db_backup db_backup; 40 | typedef struct db_thread db_thread; 41 | typedef struct control_data control_data; 42 | typedef struct conn_resource conn_resource; 43 | typedef struct wal_file wal_file; 44 | 45 | typedef struct WalIndexHdr WalIndexHdr; 46 | typedef struct WalIterator WalIterator; 47 | typedef struct WalCkptInfo WalCkptInfo; 48 | typedef struct iterate_resource iterate_resource; 49 | typedef struct priv_data priv_data; 50 | typedef struct mdbinf mdbinf; 51 | 52 | struct mdbinf 53 | { 54 | MDB_dbi infodb; 55 | MDB_dbi logdb; 56 | MDB_dbi pagesdb; 57 | MDB_dbi actorsdb; 58 | MDB_env *env; 59 | MDB_txn *txn; 60 | MDB_cursor *cursorLog; 61 | MDB_cursor *cursorPages; 62 | MDB_cursor *cursorInfo; 63 | u32 batchCounter; 64 | u64 commitCount; 65 | char hasWritten; 66 | }; 67 | 68 | struct priv_data 69 | { 70 | int nEnvs; // number of environments 71 | int nReadThreads; 72 | int nWriteThreads; 73 | queue **wtasks; // array of queues for every write thread + control thread 74 | queue **rtasks; // every environment has nReadThreads 75 | u64 *syncNumbers; 76 | mdbinf *wmdb; 77 | 78 | #ifndef _TESTAPP_ 79 | ErlNifMutex *sockMutex; 80 | ErlNifMutex *prepMutex; 81 | ErlNifMutex **wthrMutexes; 82 | ErlNifTid *tids; // tids for every write thread + control 83 | ErlNifTid *rtids; // tids for every read thread 84 | #endif 85 | 86 | // set when sockets array changes 87 | _Atomic(u64) sockUpdate; 88 | // per write thread socket array 89 | int* sockets; 90 | int* socketTypes; 91 | 92 | // For actorsdb, when opening a new actor 93 | // do an atomic increment to get a unique index. Then send a write 94 | // to write thread for it. 95 | _Atomic(u64) *actorIndexes; 96 | int prepSize; 97 | int prepVersions[MAX_PREP_SQLS][MAX_PREP_SQLS]; 98 | char* prepSqls[MAX_PREP_SQLS][MAX_PREP_SQLS]; 99 | char **paths; 100 | u64 dbsize; 101 | char staticSqls[MAX_STATIC_SQLS][256]; 102 | int nstaticSqls; 103 | 104 | int nPlugins; 105 | char **pluginFiles; 106 | 107 | #if !_TESTAPP_ 108 | ErlNifPid tunnelConnector; 109 | #endif 110 | u32 maxReqTime; 111 | db_thread **wthreads; 112 | db_thread **rthreads; 113 | }; 114 | 115 | struct Wal { 116 | // db_thread *thread; 117 | // db_thread *rthread; 118 | // for access to readSafeXXX values. They are set on write/scheduler thread and read 119 | // on read thread. 120 | #ifndef _TESTAPP_ 121 | ErlNifMutex* mtx; 122 | #endif 123 | // #ifndef _WIN32 124 | // pthread_t rthreadId; 125 | // #else 126 | // DWORD rthreadId; 127 | // #endif 128 | u64 index; 129 | u64 firstCompleteTerm; 130 | u64 firstCompleteEvnum; 131 | u64 lastCompleteTerm; 132 | u64 lastCompleteEvnum; 133 | u64 inProgressTerm; 134 | u64 inProgressEvnum; 135 | // This is set from lastCompleteXXXX once write is safely replicated. 136 | // Or after a rewind. Used by read thread. 137 | // -> not used atm because we don't control the sqlite cache 138 | // so we cant set which page versions are used. 139 | u64 readSafeTerm; 140 | u64 readSafeEvnum; 141 | Pgno readSafeMxPage; 142 | Pgno mxPage; 143 | u32 allPages; // mxPage + unused pages 144 | }; 145 | 146 | struct db_thread 147 | { 148 | #ifdef TRACK_TIME 149 | u8 timeBuf[1024*1024]; 150 | int timeBufPos; 151 | u8 timeTrack; 152 | #endif 153 | MDB_val *resFrames; 154 | mdbinf mdb; 155 | u8 *wBuffer; 156 | int bufSize; 157 | 158 | // For read threads. Before executing sql on a connection, copy over term/evnum upper limit. 159 | // Reads/writes can be completely asynchronous, at least from our code 160 | // we make no assumptions about sqlite. 161 | // We can't allow readSafeTerm/readSafeEvnum to change in the middle of a read. 162 | // u64 readSafeTerm; 163 | // u64 readSafeEvnum; 164 | 165 | // Raft page replication 166 | // MAX_CONNECTIONS (8) servers to replicate write log to 167 | int sockets[MAX_CONNECTIONS]; 168 | int socket_types[MAX_CONNECTIONS]; 169 | 170 | _Atomic(u64) nReqs; 171 | db_connection *execConn; 172 | _Atomic(u8) reqRunning; 173 | #ifndef _TESTAPP_ 174 | queue *tasks; 175 | ERL_NIF_TERM *columnSpace; 176 | #endif 177 | 178 | int columnSpaceSize; 179 | u32 pagesChanged; 180 | int nThread; // Index of this thread 181 | int nEnv; // Environment index of this thread 182 | int maxvalsize; 183 | int nResFrames; 184 | u8 forceCommit; 185 | u8 isopen; 186 | u8 isreadonly; 187 | u8 finish; 188 | 189 | char staticSqls[MAX_STATIC_SQLS][256]; 190 | int nstaticSqls; 191 | 192 | #ifndef _TESTAPP_ 193 | ErlNifEnv *env; 194 | #endif 195 | }; 196 | 197 | 198 | struct db_connection 199 | { 200 | // Write thread index 201 | u8 wthreadind; 202 | // Read thread index 203 | u8 rthreadind; 204 | // On rewind/inject or wal open. 205 | // Signals that sqlite should flush cache. 206 | u8 dirty; 207 | // Can we do checkpoint or not 208 | u8 checkpointLock; 209 | // 0 - do not replicate 210 | // > 0 - replicate to socket types that match number 211 | u8 doReplicate; 212 | u8 changed; 213 | 214 | struct Wal wal; 215 | sqlite3 *db; 216 | sqlite3_stmt **staticPrepared; 217 | sqlite3_stmt **prepared; 218 | int *prepVersions; 219 | u64 syncNum; 220 | 221 | #ifndef _TESTAPP_ 222 | // Fixed part of packet prefix 223 | char* packetPrefix; 224 | int packetPrefixSize; 225 | // Variable part of packet prefix 226 | ErlNifBinary packetVarPrefix; 227 | #endif 228 | // char wal_configured; 229 | }; 230 | 231 | struct iterate_resource 232 | { 233 | u64 evnum; 234 | u64 evterm; 235 | u32 pgnoPos; 236 | 237 | int thread; 238 | db_connection *conn; 239 | 240 | u32 mxPage; 241 | 242 | char started; 243 | char entiredb; 244 | char termMismatch; 245 | char closed; 246 | }; 247 | 248 | typedef enum 249 | { 250 | cmd_unknown = 0, 251 | cmd_open = 1, 252 | cmd_exec_script = 2, 253 | cmd_stop = 4, 254 | cmd_interrupt = 8, 255 | cmd_tcp_connect = 9, 256 | cmd_set_socket = 10, 257 | cmd_tcp_reconnect = 11, 258 | // cmd_bind_insert = 12, 259 | cmd_alltunnel_call = 13, 260 | // cmd_store_prepared = 14, 261 | cmd_checkpoint_lock = 15, 262 | cmd_iterate = 16, 263 | cmd_inject_page = 17, 264 | cmd_wal_rewind = 18, 265 | cmd_checkpoint = 20, 266 | cmd_term_store = 21, 267 | cmd_actor_info = 22, 268 | cmd_sync = 23, 269 | cmd_stmt_info = 24, 270 | cmd_file_write = 25, 271 | cmd_actorsdb_add = 26, 272 | cmd_synced = 27 273 | } command_type; 274 | 275 | typedef struct 276 | { 277 | // void *p; 278 | db_connection *conn; 279 | #ifndef _TESTAPP_ 280 | ERL_NIF_TERM ref; 281 | ErlNifPid pid; 282 | ERL_NIF_TERM arg; 283 | ERL_NIF_TERM arg1; 284 | ERL_NIF_TERM arg2; 285 | ERL_NIF_TERM arg3; 286 | ERL_NIF_TERM arg4; 287 | ERL_NIF_TERM answer; 288 | #endif 289 | // int connindex; 290 | command_type type; 291 | } db_command; 292 | 293 | 294 | #ifdef TRACK_TIME 295 | #include 296 | 297 | void track_time(u8 id, db_thread *thr); 298 | void track_flag(db_thread *thr, u8 flag); 299 | 300 | void track_flag(db_thread *thr, u8 flag) 301 | { 302 | thr->timeTrack = flag; 303 | } 304 | void track_time(u8 id, db_thread *thr) 305 | { 306 | if (thr->timeTrack && thr->timeBufPos+sizeof(u64) < sizeof(thr->timeBuf)) 307 | { 308 | u64 t = mach_absolute_time(); 309 | thr->timeBuf[thr->timeBufPos] = id; 310 | memcpy(thr->timeBuf+thr->timeBufPos+1, &t, sizeof(u64)); 311 | thr->timeBufPos += sizeof(u64) + 1; 312 | } 313 | } 314 | #else 315 | #define track_time(X,Y) 316 | #define track_flag(X,Y) 317 | #endif 318 | 319 | #ifndef _TESTAPP_ 320 | static ERL_NIF_TERM make_cell(ErlNifEnv *env, sqlite3_stmt *statement, unsigned int i); 321 | static ERL_NIF_TERM push_command(int thread, int readThreadNum,priv_data *pd, qitem *cmd); 322 | static ERL_NIF_TERM make_binary(ErlNifEnv *env, const void *bytes, unsigned int size); 323 | // int wal_hook(void *data,sqlite3* db,const char* nm,int npages); 324 | static qitem *command_create(int threadnum,int readThreadNum,priv_data* pd); 325 | // static ERL_NIF_TERM do_tcp_connect1(db_command *cmd, db_thread* thread, int pos, ErlNifEnv *env); 326 | static int bind_cell(ErlNifEnv *env, const ERL_NIF_TERM cell, sqlite3_stmt *stmt, unsigned int i); 327 | void errLogCallback(void *pArg, int iErrCode, const char *zMsg); 328 | void fail_send(int i); 329 | #endif 330 | 331 | 332 | int reopen_db(db_connection *conn, db_thread *thread); 333 | void close_prepared(db_connection *conn); 334 | // SQLITE_API int sqlite3_wal_data(sqlite3 *db,void *pArg); 335 | int checkpoint_continue(db_thread *thread); 336 | int read_wal_hdr(sqlite3_vfs *vfs, sqlite3_file *pWalFd, wal_file **outWalFile); 337 | int read_thread_wal(db_thread*); 338 | 339 | 340 | 341 | #endif 342 | -------------------------------------------------------------------------------- /c_src/noerl.c: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | // #define _TESTDBG_ 1 5 | #ifdef __linux__ 6 | #define _GNU_SOURCE 1 7 | #include 8 | #include 9 | #include 10 | #include 11 | #endif 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #ifndef _WIN32 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #else 28 | #include 29 | #include 30 | #endif 31 | 32 | // Directly include sqlite3.c 33 | // This way we are sure the included version of sqlite3 is actually used. 34 | // If we were to just include "sqlite3.h" OSX would actually use /usr/lib/libsqlite3.dylib 35 | #define SQLITE_API static 36 | #define SQLITE_EXTERN static 37 | #include "sqlite3.c" 38 | 39 | 40 | #include "actordb_driver_nif.h" 41 | #include "lz4.h" 42 | 43 | static __thread db_thread *g_tsd_thread; 44 | static __thread db_connection *g_tsd_conn; 45 | static __thread u64 g_tsd_cursync; 46 | static __thread mdbinf *g_tsd_wmdb; 47 | static priv_data *g_pd; 48 | #define enif_tsd_get pthread_getspecific 49 | 50 | static void lock_wtxn(int env); 51 | 52 | // wal.c code has been taken out of sqlite3.c and placed in wal.c file. 53 | // Every wal interface function is changed, but the wal-index code remains unchanged. 54 | #include "wal.c" 55 | #include "nullvfs.c" 56 | 57 | #define RTHREADS 4 58 | #define NCONS 100 59 | static db_connection *g_cons; 60 | 61 | static void lock_wtxn(int nEnv) 62 | { 63 | g_tsd_wmdb = &g_pd->wmdb[nEnv]; 64 | 65 | if (g_tsd_wmdb->txn == NULL) 66 | { 67 | if (open_txn(g_tsd_wmdb, 0) == NULL) 68 | return; 69 | } 70 | g_tsd_cursync = g_pd->syncNumbers[nEnv]; 71 | } 72 | 73 | static void unlock_write_txn(int nEnv, char syncForce, char *commit) 74 | { 75 | int i; 76 | 77 | if (!g_tsd_wmdb) 78 | return; 79 | 80 | ++g_tsd_wmdb->batchCounter; 81 | if (*commit || syncForce) 82 | { 83 | if (mdb_txn_commit(g_tsd_wmdb->txn) != MDB_SUCCESS) 84 | mdb_txn_abort(g_tsd_wmdb->txn); 85 | g_tsd_wmdb->txn = NULL; 86 | g_tsd_wmdb->batchCounter = 0; 87 | 88 | if (syncForce) 89 | mdb_env_sync(g_tsd_wmdb->env,1); 90 | 91 | if (syncForce) 92 | ++g_pd->syncNumbers[nEnv]; 93 | *commit = 1; 94 | } 95 | // else 96 | // DBG("UNLOCK %u",g_tsd_wmdb->usageCount); 97 | g_tsd_cursync = g_pd->syncNumbers[nEnv]; 98 | g_tsd_wmdb = NULL; 99 | } 100 | 101 | static void *perform(void *arg) 102 | { 103 | db_thread *thr = (db_thread*)arg; 104 | int i,rc; 105 | mdbinf* mdb = &thr->mdb; 106 | 107 | srand((u32)pthread_self()); 108 | open_txn(mdb, MDB_RDONLY); 109 | thr->resFrames = alloca((SQLITE_DEFAULT_PAGE_SIZE/thr->maxvalsize + 1)*sizeof(MDB_val)); 110 | 111 | for (i = 0; i < 1000*100; i++) 112 | { 113 | int j = rand() % NCONS; 114 | 115 | if (i % 1000 == 0) 116 | printf("r %lld %d\n",(i64)pthread_self(),i); 117 | 118 | if (sqlite3_mutex_try(g_cons[j].db->mutex) != 0) 119 | continue; 120 | 121 | g_tsd_conn = &g_cons[j]; 122 | 123 | rc = sqlite3_exec(g_cons[j].db,"SELECT max(id) FROM tab;",NULL,NULL,NULL); 124 | if (rc != SQLITE_OK) 125 | { 126 | printf("Error select"); 127 | break; 128 | } 129 | 130 | sqlite3_mutex_leave(g_cons[j].db->mutex); 131 | 132 | mdb_txn_reset(thr->mdb.txn); 133 | rc = mdb_txn_renew(thr->mdb.txn); 134 | if (rc != MDB_SUCCESS) 135 | break; 136 | rc = mdb_cursor_renew(mdb->txn, mdb->cursorLog); 137 | if (rc != MDB_SUCCESS) 138 | break; 139 | rc = mdb_cursor_renew(mdb->txn, mdb->cursorPages); 140 | if (rc != MDB_SUCCESS) 141 | break; 142 | rc = mdb_cursor_renew(mdb->txn, mdb->cursorInfo); 143 | if (rc != MDB_SUCCESS) 144 | break; 145 | } 146 | mdb_cursor_close(mdb->cursorLog); 147 | mdb_cursor_close(mdb->cursorPages); 148 | mdb_cursor_close(mdb->cursorInfo); 149 | mdb_txn_abort(mdb->txn); 150 | return NULL; 151 | } 152 | 153 | 154 | int main(int argc, const char* argv[]) 155 | { 156 | g_log = stdout; 157 | db_thread thr; 158 | db_thread threads[RTHREADS]; 159 | pthread_t tids[RTHREADS]; 160 | priv_data pd; 161 | mdbinf* mdb = &thr.mdb; 162 | int i, rc; 163 | db_connection *cons; 164 | g_pd = &pd; 165 | char commit = 1; 166 | MDB_env *menv = NULL; 167 | char *lmpath = "lmdb"; 168 | MDB_txn *txn; 169 | MDB_val key = {1,(void*)"?"}, data = {0,NULL}; 170 | MDB_envinfo stat; 171 | 172 | sqlite3_initialize(); 173 | sqlite3_vfs_register(sqlite3_nullvfs(), 1); 174 | 175 | unlink(lmpath); 176 | 177 | memset(threads, 0, sizeof(threads)); 178 | memset(&thr, 0, sizeof(db_thread)); 179 | memset(&pd, 0, sizeof(priv_data)); 180 | 181 | pd.wmdb = calloc(1,sizeof(mdbinf)); 182 | pd.nEnvs = 1; 183 | pd.nReadThreads = RTHREADS; 184 | pd.nWriteThreads = 1; 185 | pd.syncNumbers = calloc(1,sizeof(u64)); 186 | pd.actorIndexes = calloc(1,sizeof(atomic_llong)); 187 | atomic_init(pd.actorIndexes,0); 188 | g_cons = cons = calloc(NCONS, sizeof(db_connection)); 189 | g_tsd_cursync = 0; 190 | g_tsd_conn = NULL; 191 | g_tsd_wmdb = NULL; 192 | g_tsd_thread = &thr; 193 | 194 | if (mdb_env_create(&menv) != MDB_SUCCESS) 195 | return -1; 196 | if (mdb_env_set_maxdbs(menv,5) != MDB_SUCCESS) 197 | return -1; 198 | if (mdb_env_set_mapsize(menv,1024*1024*1024) != MDB_SUCCESS) 199 | return -1; 200 | // Syncs are handled from erlang. 201 | if (mdb_env_open(menv, lmpath, MDB_NOSUBDIR|MDB_NOTLS|MDB_NOSYNC, 0664) != MDB_SUCCESS) //MDB_NOSYNC 202 | return -1; 203 | if (mdb_txn_begin(menv, NULL, 0, &txn) != MDB_SUCCESS) 204 | return -1; 205 | if (mdb_dbi_open(txn, "info", MDB_INTEGERKEY | MDB_CREATE, &pd.wmdb[0].infodb) != MDB_SUCCESS) 206 | return -1; 207 | if (mdb_dbi_open(txn, "actors", MDB_CREATE, &pd.wmdb[0].actorsdb) != MDB_SUCCESS) 208 | return -1; 209 | if (mdb_dbi_open(txn, "log", MDB_CREATE | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, 210 | &pd.wmdb[0].logdb) != MDB_SUCCESS) 211 | return -1; 212 | if (mdb_dbi_open(txn, "pages", MDB_CREATE | MDB_DUPSORT, &pd.wmdb[0].pagesdb) != MDB_SUCCESS) 213 | return -1; 214 | if (mdb_txn_commit(txn) != MDB_SUCCESS) 215 | return -1; 216 | 217 | pd.wmdb[0].env = menv; 218 | thr.nEnv = 0; 219 | thr.isreadonly = 0; 220 | thr.mdb.env = menv; 221 | thr.mdb.infodb = pd.wmdb[0].infodb; 222 | thr.mdb.actorsdb = pd.wmdb[0].actorsdb; 223 | thr.mdb.logdb = pd.wmdb[0].logdb; 224 | thr.mdb.pagesdb = pd.wmdb[0].pagesdb; 225 | thr.maxvalsize = mdb_env_get_maxkeysize(mdb->env); 226 | thr.resFrames = alloca((SQLITE_DEFAULT_PAGE_SIZE/thr.maxvalsize + 1)*sizeof(MDB_val)); 227 | open_txn(&thr.mdb, MDB_RDONLY); 228 | 229 | for (i = 0; i < NCONS; i++) 230 | { 231 | char filename[256]; 232 | char commit = 1; 233 | g_tsd_conn = &cons[i]; 234 | sprintf(filename, "ac%d.db", i); 235 | 236 | thr.pagesChanged = 0; 237 | 238 | rc = sqlite3_open(filename,&(cons[i].db)); 239 | if(rc != SQLITE_OK) 240 | { 241 | DBG("Unable to open db"); 242 | break; 243 | } 244 | rc = sqlite3_exec(cons[i].db,"PRAGMA synchronous=0;PRAGMA journal_mode=wal;",NULL,NULL,NULL); 245 | if (rc != SQLITE_OK) 246 | { 247 | DBG("unable to open wal"); 248 | break; 249 | } 250 | cons[i].wal.inProgressTerm = 1; 251 | cons[i].wal.inProgressEvnum = 1; 252 | rc = sqlite3_exec(cons[i].db,"CREATE TABLE tab (id INTEGER PRIMARY KEY, txt TEXT);" 253 | "insert into tab values (1,'aaaa');",NULL,NULL,NULL); 254 | if (rc != SQLITE_OK) 255 | { 256 | DBG("Cant create table"); 257 | break; 258 | } 259 | unlock_write_txn(thr.nEnv, 0, &commit); 260 | 261 | mdb_txn_reset(thr.mdb.txn); 262 | 263 | rc = mdb_txn_renew(thr.mdb.txn); 264 | if (rc != MDB_SUCCESS) 265 | break; 266 | rc = mdb_cursor_renew(thr.mdb.txn, mdb->cursorLog); 267 | if (rc != MDB_SUCCESS) 268 | break; 269 | rc = mdb_cursor_renew(thr.mdb.txn, mdb->cursorPages); 270 | if (rc != MDB_SUCCESS) 271 | break; 272 | rc = mdb_cursor_renew(thr.mdb.txn, mdb->cursorInfo); 273 | if (rc != MDB_SUCCESS) 274 | break; 275 | } 276 | // mdb_cursor_close(thr.mdb.cursorLog); 277 | // mdb_cursor_close(thr.mdb.cursorPages); 278 | // mdb_cursor_close(thr.mdb.cursorInfo); 279 | // mdb_txn_abort(thr.mdb.txn); 280 | 281 | 282 | for (i = 0; i < RTHREADS; i++) 283 | { 284 | threads[i].nEnv = 0; 285 | threads[i].isreadonly = 0; 286 | threads[i].mdb.env = menv; 287 | threads[i].mdb.infodb = pd.wmdb[0].infodb; 288 | threads[i].mdb.actorsdb = pd.wmdb[0].actorsdb; 289 | threads[i].mdb.logdb = pd.wmdb[0].logdb; 290 | threads[i].mdb.pagesdb = pd.wmdb[0].pagesdb; 291 | threads[i].maxvalsize = mdb_env_get_maxkeysize(mdb->env); 292 | pthread_create(&tids[i], NULL, perform, (void *)&threads[i]); 293 | } 294 | 295 | srand((u32)pthread_self() + time(NULL)); 296 | for (i = 0; i < 1000*200; i++) 297 | { 298 | char commit = 1; 299 | int j = rand() % NCONS; 300 | db_connection *con = &g_cons[j]; 301 | char str[100]; 302 | if (sqlite3_mutex_try(con->db->mutex) != 0) 303 | { 304 | i--; 305 | continue; 306 | } 307 | 308 | if (i % 1000 == 0) 309 | printf("w %d\n",i); 310 | g_tsd_conn = con; 311 | lock_wtxn(thr.nEnv); 312 | 313 | thr.pagesChanged = 0; 314 | 315 | if (con->wal.firstCompleteEvnum+10 < con->wal.lastCompleteEvnum) 316 | { 317 | // printf("CHECKPOINT? %llu %llu\n",con->wal.firstCompleteEvnum,con->wal.lastCompleteEvnum); 318 | if (checkpoint(&con->wal, con->wal.lastCompleteEvnum-10) != SQLITE_OK) 319 | { 320 | printf("Checkpoint failed\n"); 321 | break; 322 | } 323 | } 324 | con->wal.inProgressTerm = 1; 325 | con->wal.inProgressEvnum = con->wal.lastCompleteEvnum+1; 326 | 327 | sprintf(str,"INSERT INTO tab VALUES (%d,'VALUE VALUE13456');", i); 328 | sqlite3_exec(con->db,str,NULL,NULL,NULL); 329 | 330 | sqlite3_mutex_leave(con->db->mutex); 331 | 332 | unlock_write_txn(thr.nEnv, 0, &commit); 333 | 334 | mdb_txn_reset(thr.mdb.txn); 335 | rc = mdb_txn_renew(thr.mdb.txn); 336 | if (rc != MDB_SUCCESS) 337 | break; 338 | rc = mdb_cursor_renew(thr.mdb.txn, mdb->cursorLog); 339 | if (rc != MDB_SUCCESS) 340 | break; 341 | rc = mdb_cursor_renew(thr.mdb.txn, mdb->cursorPages); 342 | if (rc != MDB_SUCCESS) 343 | break; 344 | rc = mdb_cursor_renew(thr.mdb.txn, mdb->cursorInfo); 345 | if (rc != MDB_SUCCESS) 346 | break; 347 | } 348 | 349 | unlock_write_txn(thr.nEnv, 1, &commit); 350 | 351 | 352 | for (i = 0; i < RTHREADS; i++) 353 | pthread_join(tids[i],NULL); 354 | 355 | return 1; 356 | } 357 | -------------------------------------------------------------------------------- /src/actordb_driver.erl: -------------------------------------------------------------------------------- 1 | % This Source Code Form is subject to the terms of the Mozilla Public 2 | % License, v. 2.0. If a copy of the MPL was not distributed with this 3 | % file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | -module(actordb_driver). 5 | -define(DELAY,5). 6 | -export([init/1,noop/1, 7 | open/1,open/2,open/3,open/4, 8 | exec_script/2,exec_script/3,exec_script/6,exec_script/4,exec_script/7, 9 | exec_read/2,exec_read/3,exec_read/4, 10 | exec_read_async/2,exec_read_async/3, 11 | exec_script_async/2,exec_script_async/3,exec_script_async/5,exec_script_async/6, 12 | store_prepared_table/2, 13 | close/1,inject_page/3, 14 | parse_helper/1,parse_helper/2, iterate_db/2,iterate_db/3,page_size/0, 15 | replication_done/1, 16 | get_counter/1, counter_inc/2, 17 | lz4_compress/1,lz4_decompress/2,lz4_decompress/3, 18 | iterate_close/1, fsync_num/1,fsync/1,fsync/0, 19 | set_tunnel_connector/0, set_thread_fd/4, 20 | replicate_opts/2,replicate_opts/3, all_tunnel_call/1,all_tunnel_call/2,checkpoint_lock/2, 21 | checkpoint/2, term_store/3,term_store/4, actor_info/2, wal_rewind/2,wal_rewind/3, 22 | stmt_info/2]). 23 | 24 | % #{paths => {Path1,Path2,...}, staticsqls => {StaticSql1,StaticSql2,...}, 25 | % dbsize => MaxDbSize, rthreads => NumReadThreads, wthreads => NumWriteThreads} 26 | init(Info) when is_map(Info) -> 27 | actordb_driver_nif:init(Info). 28 | 29 | get_counter(Index) -> 30 | actordb_driver_nif:counter_inc(Index, 0). 31 | counter_inc(Index,Val) -> 32 | actordb_driver_nif:counter_inc(Index, Val). 33 | 34 | set_tunnel_connector() -> 35 | actordb_driver_nif:set_tunnel_connector(). 36 | set_thread_fd(Thread,Fd,Pos,Type) -> 37 | actordb_driver_nif:set_thread_fd(Thread,Fd,Pos,Type). 38 | 39 | open(Filename) -> 40 | open(Filename,0,wal). 41 | open(Filename,ThreadNumber) -> 42 | open(Filename,ThreadNumber,wal). 43 | open(Filename,ThreadNumber,Mode) when Mode == wal; Mode == blob -> 44 | Ref = make_ref(), 45 | case actordb_driver_nif:open(Ref, self(), Filename,ThreadNumber,Mode) of 46 | again -> 47 | timer:sleep(?DELAY), 48 | open(Filename,ThreadNumber,Mode); 49 | ok -> 50 | case receive_answer(Ref) of 51 | {ok,Connection} -> 52 | {ok, {actordb_driver, make_ref(),Connection}}; 53 | {error, _Msg}=Error -> 54 | Error 55 | end 56 | end; 57 | open(Filename,ThreadNumber,Sql) when is_binary(Sql); is_list(Sql) -> 58 | open(Filename,ThreadNumber,Sql,wal). 59 | open(Filename,ThreadNumber,Sql,Mode) -> 60 | Ref = make_ref(), 61 | case actordb_driver_nif:open(Ref, self(), Filename,ThreadNumber,Mode,Sql) of 62 | again -> 63 | timer:sleep(?DELAY); 64 | ok -> 65 | case receive_answer(Ref) of 66 | {ok,Connection,Res} -> 67 | {ok, {actordb_driver, make_ref(),Connection},Res}; 68 | {ok,Connection} -> 69 | {ok, {actordb_driver, make_ref(),Connection}}; 70 | {error, _Msg}=Error -> 71 | Error 72 | end 73 | end. 74 | 75 | actor_info(Name,Thread) -> 76 | Ref = make_ref(), 77 | case actordb_driver_nif:actor_info(Ref,self(),Name,Thread) of 78 | again -> 79 | timer:sleep(?DELAY), 80 | actor_info(Name, Thread); 81 | ok -> 82 | receive_answer(Ref) 83 | end. 84 | 85 | term_store({actordb_driver, _Ref, Connection},CurrentTerm,VotedFor) -> 86 | case actordb_driver_nif:term_store(Connection, CurrentTerm, VotedFor) of 87 | ok -> 88 | ok; 89 | again -> 90 | timer:sleep(?DELAY), 91 | term_store({actordb_driver, _Ref, Connection},CurrentTerm,VotedFor) 92 | end. 93 | term_store(Name, CurrentTerm, VotedFor, Thread) -> 94 | case actordb_driver_nif:term_store(Name, CurrentTerm, VotedFor, Thread) of 95 | ok -> 96 | ok; 97 | again -> 98 | timer:sleep(?DELAY), 99 | term_store(Name, CurrentTerm, VotedFor, Thread) 100 | end. 101 | 102 | close({actordb_driver, _Ref, _Connection}) -> 103 | % Noop. Rely on GC. This is to avoid double closing. 104 | ok. 105 | 106 | store_prepared_table(Indexes,Sqls) when is_tuple(Indexes), is_tuple(Sqls), tuple_size(Indexes) == tuple_size(Sqls) -> 107 | actordb_driver_nif:store_prepared_table(Indexes,Sqls). 108 | 109 | checkpoint({actordb_driver, _Ref, Connection}, Evnum) -> 110 | Ref = make_ref(), 111 | case actordb_driver_nif:checkpoint(Connection,Ref,self(),Evnum) of 112 | ok -> 113 | receive_answer(Ref); 114 | again -> 115 | timer:sleep(?DELAY), 116 | checkpoint({actordb_driver, _Ref, Connection}, Evnum) 117 | end. 118 | 119 | stmt_info({actordb_driver, _Ref, Connection}, Sql) -> 120 | Ref = make_ref(), 121 | case actordb_driver_nif:stmt_info(Connection,Ref,self(),Sql) of 122 | ok -> 123 | receive_answer(Ref); 124 | again -> 125 | timer:sleep(?DELAY), 126 | stmt_info({actordb_driver, _Ref, Connection}, Sql) 127 | end. 128 | 129 | parse_helper(Bin) -> 130 | parse_helper(Bin,0). 131 | parse_helper(Bin,Offset) -> 132 | actordb_driver_nif:parse_helper(Bin,Offset). 133 | 134 | replicate_opts(Con,PacketPrefix) -> 135 | replicate_opts(Con,PacketPrefix,1). 136 | replicate_opts({actordb_driver, _Ref, Connection},PacketPrefix,Type) -> 137 | ok = actordb_driver_nif:replicate_opts(Connection,PacketPrefix,Type). 138 | 139 | replication_done({actordb_driver, _Ref, Connection}) -> 140 | ok = actordb_driver_nif:replication_done(Connection). 141 | 142 | % tcp_connect(Ip,Port,ConnectStr,ConnNumber) -> 143 | % Ref = make_ref(), 144 | % actordb_driver_nif:tcp_connect(Ref,self(),Ip,Port,ConnectStr,ConnNumber), 145 | % receive_answer(Ref). 146 | % tcp_connect_async(Ip,Port,ConnectStr,ConnNumber) -> 147 | % Ref = make_ref(), 148 | % actordb_driver_nif:tcp_connect(Ref,self(),Ip,Port,ConnectStr,ConnNumber), 149 | % Ref. 150 | % tcp_connect_async(Ip,Port,ConnectStr,ConnNumber,Type) -> 151 | % Ref = make_ref(), 152 | % actordb_driver_nif:tcp_connect(Ref,self(),Ip,Port,ConnectStr,ConnNumber,Type), 153 | % Ref. 154 | % tcp_reconnect() -> 155 | % actordb_driver_nif:tcp_reconnect(). 156 | 157 | all_tunnel_call(Bin) -> 158 | Ref = make_ref(), 159 | case actordb_driver_nif:all_tunnel_call(Ref,self(),Bin) of 160 | ok -> 161 | receive_answer(Ref); 162 | again -> 163 | timer:sleep(?DELAY), 164 | all_tunnel_call(Bin) 165 | end. 166 | all_tunnel_call(Head,Body) -> 167 | Ref = make_ref(), 168 | case actordb_driver_nif:all_tunnel_call(Ref,self(),Head,Body) of 169 | ok -> 170 | receive_answer(Ref); 171 | again -> 172 | timer:sleep(?DELAY), 173 | all_tunnel_call(Head,Body) 174 | end. 175 | 176 | lz4_compress(B) -> 177 | actordb_driver_nif:lz4_compress(B). 178 | lz4_decompress(B,SizeOrig) -> 179 | actordb_driver_nif:lz4_decompress(B,SizeOrig). 180 | lz4_decompress(B,SizeOrig,SizeIn) -> 181 | actordb_driver_nif:lz4_decompress(B,SizeOrig,SizeIn). 182 | 183 | % wal_pages({actordb_driver, _Ref, Connection}) -> 184 | % actordb_driver_nif:wal_pages(Connection). 185 | 186 | wal_rewind({actordb_driver, _Ref, Connection},Evnum) -> 187 | Ref = make_ref(), 188 | case actordb_driver_nif:wal_rewind(Connection, Ref, self(),Evnum) of 189 | ok -> 190 | receive_answer(Ref); 191 | again -> 192 | timer:sleep(?DELAY), 193 | wal_rewind({actordb_driver, _Ref, Connection},Evnum) 194 | end. 195 | wal_rewind({actordb_driver, _Ref, Connection}, 0, ReplaceSql) when is_binary(ReplaceSql); is_list(ReplaceSql) -> 196 | Ref = make_ref(), 197 | case actordb_driver_nif:wal_rewind(Connection, Ref, self(),0, ReplaceSql) of 198 | ok -> 199 | receive_answer(Ref); 200 | again -> 201 | timer:sleep(?DELAY), 202 | wal_rewind({actordb_driver, _Ref, Connection}, 0, ReplaceSql) 203 | end. 204 | 205 | fsync_num({actordb_driver, _Ref, Connection}) -> 206 | actordb_driver_nif:fsync_num(Connection). 207 | fsync() -> 208 | case actordb_driver_nif:fsync() of 209 | again -> 210 | io:format("again~n"), 211 | timer:sleep(?DELAY), 212 | fsync(); 213 | ok -> 214 | ok 215 | end. 216 | fsync({actordb_driver, _Ref, Connection} = C) -> 217 | Ref = make_ref(), 218 | case actordb_driver_nif:fsync(Connection, Ref, self()) of 219 | again -> 220 | timer:sleep(?DELAY), 221 | fsync(C); 222 | ok -> 223 | % receive_answer(Ref) 224 | ok 225 | end. 226 | 227 | iterate_close({iter,Iter}) -> 228 | ok = actordb_driver_nif:iterate_close(Iter). 229 | 230 | iterate_db({actordb_driver, _Ref, Connection},{iter,Iter}) -> 231 | Ref = make_ref(), 232 | case actordb_driver_nif:iterate_db(Connection, Ref, self(),Iter) of 233 | ok -> 234 | receive_answer(Ref); 235 | again -> 236 | timer:sleep(?DELAY), 237 | iterate_db({actordb_driver, _Ref, Connection},{iter,Iter}) 238 | end. 239 | iterate_db({actordb_driver, _Ref, Connection},Evterm,Evnum) when is_integer(Evnum) -> 240 | Ref = make_ref(), 241 | case actordb_driver_nif:iterate_db(Connection, Ref, self(), Evterm,Evnum) of 242 | ok -> 243 | receive_answer(Ref); 244 | again -> 245 | timer:sleep(?DELAY), 246 | iterate_db({actordb_driver, _Ref, Connection},Evterm,Evnum) 247 | end. 248 | 249 | inject_page({actordb_driver, _Ref, Connection},Bin,Head) -> 250 | Ref = make_ref(), 251 | case actordb_driver_nif:inject_page(Connection, Ref, self(),Bin,Head) of 252 | ok -> 253 | receive_answer(Ref); 254 | again -> 255 | timer:sleep(?DELAY), 256 | inject_page({actordb_driver, _Ref, Connection},Bin,Head) 257 | end. 258 | % inject_page({actordb_driver, _Ref, Connection},Bin,Head) -> 259 | % Ref = make_ref(), 260 | % ok = actordb_driver_nif:inject_page(Connection, Ref, self(),Bin,Head), 261 | % receive_answer(Ref). 262 | 263 | page_size() -> 264 | actordb_driver_nif:page_size(). 265 | 266 | noop({actordb_driver, _Ref, Connection}) -> 267 | Ref = make_ref(), 268 | case actordb_driver_nif:noop(Connection, Ref, self()) of 269 | ok -> 270 | receive_answer(Ref); 271 | again -> 272 | timer:sleep(?DELAY), 273 | noop({actordb_driver, _Ref, Connection}) 274 | end. 275 | 276 | exec_read(Sql,Db) -> 277 | exec_read(Sql,Db,infinity). 278 | exec_read(Sql,{actordb_driver, _Ref, Connection},Timeout) -> 279 | Ref = make_ref(), 280 | case actordb_driver_nif:exec_read(Connection, Ref, self(), Sql) of 281 | ok -> 282 | receive_answer(Ref); 283 | again -> 284 | timer:sleep(?DELAY), 285 | exec_read(Sql,{actordb_driver, _Ref, Connection},Timeout) 286 | end; 287 | exec_read(Sql,Recs,{actordb_driver, _Ref, _Connection} = Db) -> 288 | exec_read(Sql,Recs,Db,infinity). 289 | exec_read(Sql,Recs,{actordb_driver, _Ref, Connection},Timeout) -> 290 | Ref = make_ref(), 291 | case actordb_driver_nif:exec_read(Connection, Ref, self(), Sql, Recs) of 292 | ok -> 293 | receive_answer(Ref); 294 | again -> 295 | timer:sleep(?DELAY), 296 | exec_read(Sql,Recs,{actordb_driver, _Ref, Connection},Timeout) 297 | end. 298 | 299 | exec_script(Sql, Db) -> 300 | exec_script(Sql,Db,infinity,0,0,<<>>). 301 | 302 | exec_script(Sql,Recs, Db) when element(1,Db) == actordb_driver -> 303 | exec_script(Sql,Recs,Db,infinity,0,0,<<>>); 304 | exec_script(Sql, Db, Timeout) when element(1,Db) == actordb_driver -> 305 | exec_script(Sql,Db,Timeout,0,0,<<>>). 306 | 307 | exec_script(Sql, Recs, Db, Timeout) when is_integer(Timeout), element(1,Db) == actordb_driver -> 308 | exec_script(Sql,Recs,Db,Timeout,0,0,<<>>). 309 | 310 | exec_script(Sql, {actordb_driver, _Ref, Connection},Timeout,Term,Index,AppendParam) -> 311 | Ref = make_ref(), 312 | case actordb_driver_nif:exec_script(Connection, Ref, self(), Sql,Term,Index,AppendParam) of 313 | ok -> 314 | receive_answer(Ref); 315 | again -> 316 | timer:sleep(?DELAY), 317 | exec_script(Sql, {actordb_driver, _Ref, Connection},Timeout,Term,Index,AppendParam) 318 | end. 319 | exec_script(Sql, Recs, {actordb_driver, _Ref, Connection},Timeout,Term,Index,AppendParam) -> 320 | Ref = make_ref(), 321 | case actordb_driver_nif:exec_script(Connection, Ref, self(), Sql,Term,Index,AppendParam,Recs) of 322 | ok -> 323 | receive_answer(Ref); 324 | again -> 325 | timer:sleep(?DELAY), 326 | exec_script(Sql, Recs, {actordb_driver, _Ref, Connection},Timeout,Term,Index,AppendParam) 327 | end. 328 | 329 | 330 | exec_read_async(Sql,{actordb_driver, _Ref, Connection}) -> 331 | Ref = make_ref(), 332 | case actordb_driver_nif:exec_read(Connection, Ref, self(), Sql) of 333 | ok -> 334 | Ref; 335 | again -> 336 | timer:sleep(?DELAY), 337 | exec_read_async(Sql,{actordb_driver, _Ref, Connection}) 338 | end. 339 | exec_read_async(Sql,Recs,{actordb_driver, _Ref, Connection}) -> 340 | Ref = make_ref(), 341 | case actordb_driver_nif:exec_read(Connection, Ref, self(), Sql, Recs) of 342 | ok -> 343 | Ref; 344 | again -> 345 | timer:sleep(?DELAY), 346 | exec_read_async(Sql,Recs,{actordb_driver, _Ref, Connection}) 347 | end. 348 | 349 | exec_script_async(Sql,Recs, Db) when element(1,Db) == actordb_driver -> 350 | exec_script_async(Sql,Recs,Db,0,0,<<>>). 351 | exec_script_async(Sql, Db) when element(1,Db) == actordb_driver -> 352 | exec_script_async(Sql,Db,0,0,<<>>). 353 | 354 | exec_script_async(Sql, {actordb_driver, _Ref, Connection},Term,Index,AppendParam) -> 355 | Ref = make_ref(), 356 | case actordb_driver_nif:exec_script(Connection, Ref, self(), Sql,Term,Index,AppendParam) of 357 | ok -> 358 | Ref; 359 | again -> 360 | timer:sleep(?DELAY), 361 | exec_script_async(Sql, {actordb_driver, _Ref, Connection},Term,Index,AppendParam) 362 | end. 363 | exec_script_async(Sql, Recs, {actordb_driver, _Ref, Connection},Term,Index,AppendParam) -> 364 | Ref = make_ref(), 365 | case actordb_driver_nif:exec_script(Connection, Ref, self(), Sql,Term,Index,AppendParam,Recs) of 366 | ok -> 367 | Ref; 368 | again -> 369 | timer:sleep(?DELAY), 370 | exec_script_async(Sql, Recs, {actordb_driver, _Ref, Connection},Term,Index,AppendParam) 371 | end. 372 | 373 | checkpoint_lock({actordb_driver, _Ref, Connection},Lock) -> 374 | case Lock of 375 | true -> 376 | L = 1; 377 | false -> 378 | L = 0; 379 | 0 = L -> 380 | ok; 381 | 1 = L -> 382 | ok 383 | end, 384 | Ref = make_ref(), 385 | case actordb_driver_nif:checkpoint_lock(Connection,Ref,self(),L) of 386 | ok -> 387 | ok; 388 | again -> 389 | timer:sleep(?DELAY), 390 | checkpoint_lock({actordb_driver, _Ref, Connection},Lock) 391 | end. 392 | 393 | % backup_init({actordb_driver, _, Dest},{actordb_driver, _, Src}) -> 394 | % Ref = make_ref(), 395 | % ok = actordb_driver_nif:backup_init(Dest,Src,Ref,self()), 396 | % case receive_answer(Ref) of 397 | % {ok,B} -> 398 | % {ok,{backup,make_ref(),B}}; 399 | % error -> 400 | % error 401 | % end. 402 | 403 | % backup_step({backup, _Ref, B},N) -> 404 | % Ref = make_ref(), 405 | % ok = actordb_driver_nif:backup_step(B,Ref,self(),N), 406 | % receive_answer(Ref). 407 | 408 | % backup_finish({backup, _Ref, B}) -> 409 | % Ref = make_ref(), 410 | % ok = actordb_driver_nif:backup_finish(B,Ref,self()), 411 | % receive_answer(Ref). 412 | 413 | % backup_pages({backup, _Ref, B}) -> 414 | % actordb_driver_nif:backup_pages(B). 415 | 416 | 417 | receive_answer(Ref) -> 418 | receive 419 | {Ref, Resp} -> Resp 420 | end. 421 | % receive_answer(Ref,Connection,Timeout) -> 422 | % receive 423 | % {Ref,Resp} -> 424 | % Resp 425 | % after Timeout -> 426 | % ok = actordb_driver_nif:interrupt_query(Connection), 427 | % receive 428 | % {Ref,Resp} -> 429 | % Resp 430 | % end 431 | % end. 432 | -------------------------------------------------------------------------------- /c_src/nullvfs.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** 2010 April 7 3 | ** 4 | ** The author disclaims copyright to this source code. In place of 5 | ** a legal notice, here is a blessing: 6 | ** 7 | ** May you do good and not evil. 8 | ** May you find forgiveness for yourself and forgive others. 9 | ** May you share freely, never taking more than you give. 10 | ** 11 | ************************************************************************* 12 | 13 | ** Null VFS. Because we want all data within MDB, we implement a custom VFS that does nothing. 14 | ** The only thing that is left is the base sqlite file at pgno 1. 15 | ** Pgno gets written with every write anyway so the base file is useless. 16 | ** It is cleaner to create this useless VFS than to mess with pager.c 17 | ** Sqlite will still read the base file when opened. So we return a default wal enabled empty 18 | ** page. 19 | */ 20 | 21 | #if !defined(SQLITE_TEST) || SQLITE_OS_UNIX 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #ifndef _WIN32 28 | #include 29 | #include 30 | #include 31 | #endif 32 | #include 33 | #include 34 | #include 35 | 36 | /* 37 | ** The maximum pathname length supported by this VFS. 38 | */ 39 | #define MAXPATHNAME 512 40 | 41 | /* 42 | ** When using this VFS, the sqlite3_file* handles that SQLite uses are 43 | ** actually pointers to instances of type NullFile. 44 | */ 45 | typedef struct NullFile NullFile; 46 | struct NullFile { 47 | sqlite3_file base; /* Base class. Must be first. */ 48 | // i64 size; 49 | // u8 buf[SQLITE_DEFAULT_PAGE_SIZE]; 50 | int lock; 51 | }; 52 | sqlite3_vfs *sqlite3_nullvfs(void); 53 | 54 | static int nullDirectWrite( 55 | NullFile *p, /* File handle */ 56 | const void *zBuf, /* Buffer containing data to write */ 57 | int iAmt, /* Size of data to write in bytes */ 58 | sqlite_int64 iOfst /* File offset to write to */ 59 | ){ 60 | // DBG((g_log,"drwite offset=%lld, size=%d\n",iOfst,iAmt)); 61 | 62 | // if ((iOfst + iAmt) > p->size) 63 | // p->size += ((iOfst+iAmt)-p->size); 64 | // if ((iOfst + iAmt) <= sizeof(p->buf)) 65 | // memcpy(p->buf+iOfst,zBuf,iAmt); 66 | 67 | return SQLITE_OK; 68 | } 69 | 70 | 71 | static int nullClose(sqlite3_file *pFile){ 72 | // printf("close\n"); 73 | return SQLITE_OK; 74 | } 75 | 76 | /* 77 | ** Read data from a file. 78 | */ 79 | static int nullRead( 80 | sqlite3_file *pFile, 81 | void *zBuf, 82 | int iAmt, 83 | sqlite_int64 iOfst 84 | ){ 85 | // NullFile *p = (NullFile*)pFile; 86 | 87 | // DBG((g_log,"read bytes=%d offset=%lld\n",iAmt,iOfst)); 88 | 89 | if ((iOfst + iAmt) > SQLITE_DEFAULT_PAGE_SIZE) 90 | return SQLITE_IOERR_SHORT_READ; 91 | 92 | if (iOfst == 0 && iAmt >= 100) 93 | { 94 | u8 *page = zBuf; 95 | memset(page,0,iAmt); 96 | strcpy((char*)page,"SQLite format 3"); 97 | page[16] = ((SQLITE_DEFAULT_PAGE_SIZE >> 8) & 255); 98 | page[17] = SQLITE_DEFAULT_PAGE_SIZE & 255; 99 | page[18] = 2; 100 | page[19] = 2; 101 | page[21] = 64; 102 | page[22] = 32; 103 | page[23] = 32; 104 | page[27] = 1; 105 | page[31] = 1; 106 | page[95] = 1; 107 | page[97] = 45; 108 | page[98] = 230; 109 | page[99] = 10; 110 | 111 | if (iAmt > 100) 112 | { 113 | page[100] = 13; 114 | page[105] = 16; 115 | } 116 | } 117 | else 118 | { 119 | u8 page[SQLITE_DEFAULT_PAGE_SIZE]; 120 | // Code to print values of first page (its mostly empty). 121 | // This way we can simply hardcode the only part of file that gets called to vfs for. 122 | // lists:foldl(fun(X,C) -> case X > 0 of true -> io:format("~p: ~p, ~n",[C,X]); false -> ok end,C+1 end,0,binary_to_list(B)). 123 | memset(page,0,sizeof(page)); 124 | strcpy((char*)page,"SQLite format 3"); 125 | page[16] = ((SQLITE_DEFAULT_PAGE_SIZE >> 8) & 255); 126 | page[17] = SQLITE_DEFAULT_PAGE_SIZE & 255; 127 | page[18] = 2; 128 | page[19] = 2; 129 | page[21] = 64; 130 | page[22] = 32; 131 | page[23] = 32; 132 | page[27] = 1; 133 | page[31] = 1; 134 | page[95] = 1; 135 | page[97] = 45; 136 | page[98] = 230; 137 | page[99] = 10; 138 | page[100] = 13; 139 | page[105] = 16; 140 | 141 | memcpy(zBuf,page+iOfst,iAmt); 142 | } 143 | 144 | return SQLITE_OK; 145 | } 146 | 147 | /* 148 | ** Write data to a crash-file. 149 | */ 150 | static int nullWrite( 151 | sqlite3_file *pFile, 152 | const void *zBuf, 153 | int iAmt, 154 | sqlite_int64 iOfst 155 | ){ 156 | NullFile *p = (NullFile*)pFile; 157 | return nullDirectWrite(p,zBuf,iAmt,iOfst); 158 | } 159 | 160 | /* 161 | ** Truncate a file. This is a no-op for this VFS (see header comments at 162 | ** the top of the file). 163 | */ 164 | static int nullTruncate(sqlite3_file *pFile, sqlite_int64 size){ 165 | // printf("truncate\n"); 166 | return SQLITE_OK; 167 | } 168 | 169 | /* 170 | ** Sync the contents of the file to the persistent media. 171 | */ 172 | static int nullSync(sqlite3_file *pFile, int flags){ 173 | // printf("nullsync\n"); 174 | return SQLITE_OK; 175 | } 176 | 177 | /* 178 | ** Write the size of the file in bytes to *pSize. 179 | */ 180 | static int nullFileSize(sqlite3_file *pFile, sqlite_int64 *pSize){ 181 | // NullFile *p = (NullFile*)pFile; 182 | // *pSize = p->size; 183 | 184 | *pSize = SQLITE_DEFAULT_PAGE_SIZE; 185 | 186 | return SQLITE_OK; 187 | } 188 | 189 | static int nullLock(sqlite3_file *pFile, int eLock){ 190 | // printf("lock\n"); 191 | NullFile *p = (NullFile*)pFile; 192 | if (p->lock < eLock) 193 | p->lock = eLock; 194 | return SQLITE_OK; 195 | } 196 | static int nullUnlock(sqlite3_file *pFile, int eLock){ 197 | // printf("unlock\n"); 198 | NullFile *p = (NullFile*)pFile; 199 | if (p->lock > eLock) 200 | p->lock = eLock; 201 | return SQLITE_OK; 202 | } 203 | static int nullCheckReservedLock(sqlite3_file *pFile, int *pResOut){ 204 | *pResOut = 0; 205 | // printf("lock\n"); 206 | return SQLITE_OK; 207 | } 208 | 209 | static int nullFileControl(sqlite3_file *pFile, int op, void *pArg){ 210 | // if (op == SQLITE_FCNTL_SIZE_HINT) 211 | // { 212 | // printf("SIZEHINT %lld\n",*(i64*)pArg); 213 | // } 214 | NullFile *p = (NullFile*)pFile; 215 | switch( op ){ 216 | case SQLITE_FCNTL_WAL_BLOCK: { 217 | return SQLITE_OK; 218 | } 219 | case SQLITE_FCNTL_LOCKSTATE: { 220 | *(int*)pArg = p->lock; 221 | return SQLITE_OK; 222 | } 223 | case SQLITE_FCNTL_LAST_ERRNO: { 224 | *(int*)pArg = 0; 225 | return SQLITE_OK; 226 | } 227 | case SQLITE_FCNTL_CHUNK_SIZE: { 228 | return SQLITE_OK; 229 | } 230 | case SQLITE_FCNTL_SIZE_HINT: { 231 | return SQLITE_OK; 232 | } 233 | case SQLITE_FCNTL_PERSIST_WAL: { 234 | return SQLITE_OK; 235 | } 236 | case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { 237 | return SQLITE_OK; 238 | } 239 | case SQLITE_FCNTL_VFSNAME: { 240 | *(char**)pArg = "123"; 241 | return SQLITE_OK; 242 | } 243 | case SQLITE_FCNTL_TEMPFILENAME: { 244 | *(char**)pArg = "123"; 245 | return SQLITE_OK; 246 | } 247 | case SQLITE_FCNTL_HAS_MOVED: { 248 | *(int*)pArg = 0; 249 | return SQLITE_OK; 250 | } 251 | #if SQLITE_MAX_MMAP_SIZE>0 252 | case SQLITE_FCNTL_MMAP_SIZE: { 253 | return SQLITE_OK; 254 | } 255 | #endif 256 | #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 257 | case SQLITE_FCNTL_SET_LOCKPROXYFILE: 258 | case SQLITE_FCNTL_GET_LOCKPROXYFILE: { 259 | return SQLITE_OK; 260 | } 261 | #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ 262 | } 263 | return SQLITE_NOTFOUND; 264 | } 265 | 266 | static int nullSectorSize(sqlite3_file *pFile){ 267 | // printf("nullSectorSize\n"); 268 | return SQLITE_DEFAULT_SECTOR_SIZE; 269 | } 270 | static int nullDeviceCharacteristics(sqlite3_file *pFile){ 271 | // printf("devchar\n"); 272 | return 0; 273 | } 274 | 275 | static int nullShmMap( 276 | sqlite3_file *fd, /* Handle open on database file */ 277 | int iRegion, /* Region to retrieve */ 278 | int szRegion, /* Size of regions */ 279 | int bExtend, /* True to extend file if necessary */ 280 | void volatile **pp /* OUT: Mapped memory */ 281 | ){return SQLITE_OK; } 282 | 283 | static int nullShmLock( 284 | sqlite3_file *fd, /* Database file holding the shared memory */ 285 | int ofst, /* First lock to acquire or release */ 286 | int n, /* Number of locks to acquire or release */ 287 | int flags /* What to do with the lock */ 288 | ){return SQLITE_OK; } 289 | 290 | static void nullShmBarrier( 291 | sqlite3_file *fd /* Database file holding the shared memory */ 292 | ){} 293 | 294 | static int nullShmUnmap( 295 | sqlite3_file *fd, /* The underlying database file */ 296 | int deleteFlag /* Delete shared-memory if true */ 297 | ){return SQLITE_OK;} 298 | 299 | // static void nullUnmapfile(unixFile *pFd){} 300 | 301 | static int nullFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){return SQLITE_OK; } 302 | 303 | static int nullUnfetch(sqlite3_file *fd, i64 iOff, void *p){return SQLITE_OK; } 304 | 305 | 306 | static int nullOpen( 307 | sqlite3_vfs *pVfs, /* VFS */ 308 | const char *zName, /* File to open, or 0 for a temp file */ 309 | sqlite3_file *pFile, /* Pointer to NullFile struct to populate */ 310 | int flags, /* Input SQLITE_OPEN_XXX flags */ 311 | int *pOutFlags /* Output SQLITE_OPEN_XXX flags (or NULL) */ 312 | ){ 313 | int openflags = 0; 314 | static const sqlite3_io_methods nullio = { 315 | 3, /* iVersion */ 316 | nullClose, /* xClose */ 317 | nullRead, /* xRead */ 318 | nullWrite, /* xWrite */ 319 | nullTruncate, /* xTruncate */ 320 | nullSync, /* xSync */ 321 | nullFileSize, /* xFileSize */ 322 | nullLock, /* xLock */ 323 | nullUnlock, /* xUnlock */ 324 | nullCheckReservedLock, /* xCheckReservedLock */ 325 | nullFileControl, /* xFileControl */ 326 | nullSectorSize, /* xSectorSize */ 327 | nullDeviceCharacteristics, /* xDeviceCharacteristics */ 328 | nullShmMap, /* xShmMap */ 329 | nullShmLock, /* xShmLock */ 330 | nullShmBarrier, /* xShmBarrier */ 331 | nullShmUnmap, /* xShmUnmap */ 332 | nullFetch, /* xFetch */ 333 | nullUnfetch /* xUnfetch */ 334 | }; 335 | int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); 336 | int isCreate = (flags & SQLITE_OPEN_CREATE); 337 | int isReadonly = (flags & SQLITE_OPEN_READONLY); 338 | int isReadWrite = (flags & SQLITE_OPEN_READWRITE); 339 | 340 | NullFile *p = (NullFile*)pFile; /* Populate this structure */ 341 | 342 | memset(p, 0, sizeof(NullFile)); 343 | 344 | if( isReadonly ) openflags |= O_RDONLY; 345 | if( isReadWrite ) openflags |= O_RDWR; 346 | if( isCreate ) openflags |= O_CREAT; 347 | #ifdef O_NOFOLLOW 348 | if( isExclusive ) openflags |= (O_EXCL|O_NOFOLLOW); 349 | #else 350 | if( isExclusive ) openflags |= (O_EXCL); 351 | #endif 352 | 353 | #ifdef O_LARGEFILE 354 | openflags |= (O_LARGEFILE|O_BINARY); 355 | #else 356 | openflags |= (O_BINARY); 357 | #endif 358 | 359 | if( pOutFlags ){ 360 | #ifdef O_LARGEFILE 361 | *pOutFlags = (openflags | O_LARGEFILE | O_BINARY); 362 | #else 363 | *pOutFlags = (openflags | O_BINARY); 364 | #endif 365 | } 366 | p->base.pMethods = &nullio; 367 | // printf("OPENOK\n"); 368 | return SQLITE_OK; 369 | } 370 | 371 | /* 372 | ** Delete the file identified by argument zPath. If the dirSync parameter 373 | ** is non-zero, then ensure the file-system modification to delete the 374 | ** file has been synced to disk before returning. 375 | */ 376 | static int nullDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){ 377 | // printf("delete\n"); 378 | return SQLITE_OK; 379 | } 380 | 381 | #ifndef F_OK 382 | # define F_OK 0 383 | #endif 384 | #ifndef R_OK 385 | # define R_OK 4 386 | #endif 387 | #ifndef W_OK 388 | # define W_OK 2 389 | #endif 390 | 391 | /* 392 | ** Query the file-system to see if the named file exists, is readable or 393 | ** is both readable and writable. 394 | */ 395 | static int nullAccess( 396 | sqlite3_vfs *pVfs, 397 | const char *zPath, 398 | int flags, 399 | int *pResOut 400 | ){ 401 | *pResOut = 0; 402 | return SQLITE_OK; 403 | } 404 | 405 | /* 406 | ** Argument zPath points to a nul-terminated string containing a file path. 407 | ** If zPath is an absolute path, then it is copied as is into the output 408 | ** buffer. Otherwise, if it is a relative path, then the equivalent full 409 | ** path is written to the output buffer. 410 | ** 411 | ** This function assumes that paths are UNIX style. Specifically, that: 412 | ** 413 | ** 1. Path components are separated by a '/'. and 414 | ** 2. Full paths begin with a '/' character. 415 | */ 416 | static int nullFullPathname( 417 | sqlite3_vfs *pVfs, /* VFS */ 418 | const char *zPath, /* Input path (possibly a relative path) */ 419 | int nPathOut, /* Size of output buffer in bytes */ 420 | char *zPathOut /* Pointer to output buffer */ 421 | ){ 422 | char zDir[MAXPATHNAME+1]; 423 | zDir[0] = '\0'; 424 | zDir[MAXPATHNAME] = '\0'; 425 | // printf("path\n"); 426 | 427 | sqlite3_snprintf(nPathOut, zPathOut, "%s/%s", zDir, zPath); 428 | zPathOut[nPathOut-1] = '\0'; 429 | 430 | return SQLITE_OK; 431 | } 432 | 433 | 434 | /* 435 | ** This function returns a pointer to the VFS implemented in this file. 436 | ** To make the VFS available to SQLite: 437 | ** 438 | ** sqlite3_vfs_register(sqlite3_nullvfs(), 0); 439 | */ 440 | sqlite3_vfs *sqlite3_nullvfs(void){ 441 | static sqlite3_vfs nullvfs = { 442 | 3, /* iVersion */ 443 | sizeof(NullFile), /* szOsFile */ 444 | MAXPATHNAME, /* mxPathname */ 445 | 0, /* pNext */ 446 | "null", /* zName */ 447 | 0, /* pAppData */ 448 | nullOpen, /* xOpen */ 449 | nullDelete, /* xDelete */ 450 | nullAccess, /* xAccess */ 451 | nullFullPathname, /* xFullPathname */ 452 | #ifndef _WIN32 453 | unixDlOpen, /* xDlOpen */ 454 | unixDlError, /* xDlError */ 455 | unixDlSym, /* xDlSym */ 456 | unixDlClose, /* xDlClose */ 457 | unixRandomness, /* xRandomness */ 458 | unixSleep, /* xSleep */ 459 | unixCurrentTime, /* xCurrentTime */ 460 | unixGetLastError, /* xGetLastError */ 461 | unixCurrentTimeInt64, /* xCurrentTimeInt64 */ 462 | unixSetSystemCall, /* xSetSystemCall */ 463 | unixGetSystemCall, /* xGetSystemCall */ 464 | unixNextSystemCall, /* xNextSystemCall */ 465 | #else 466 | winDlOpen, 467 | winDlError, 468 | winDlSym, 469 | winDlClose, 470 | winRandomness, 471 | winSleep, 472 | winCurrentTime, /* xCurrentTime */ 473 | winGetLastError, /* xGetLastError */ 474 | winCurrentTimeInt64, /* xCurrentTimeInt64 */ 475 | winSetSystemCall, /* xSetSystemCall */ 476 | winGetSystemCall, /* xGetSystemCall */ 477 | winNextSystemCall, /* xNextSystemCall */ 478 | #endif 479 | }; 480 | return &nullvfs; 481 | } 482 | 483 | #endif /* !defined(SQLITE_TEST) || SQLITE_OS_UNIX */ 484 | -------------------------------------------------------------------------------- /test/test.erl: -------------------------------------------------------------------------------- 1 | -module(test). 2 | -include_lib("eunit/include/eunit.hrl"). 3 | -define(READTHREADS,4). 4 | -define(WRITETHREADS,2). 5 | -define(DBSIZE,4096*1024*128*5). 6 | % -define(INIT,actordb_driver:init({{"."},{"INSERT INTO tab VALUES (?1,?2);"},?DBSIZE,?READTHREADS,?WRITETHREADS})). 7 | -define(CFG,#{paths => {"."}, 8 | pluginfiles => list_to_tuple(filelib:wildcard("../test/*.dylib")++ 9 | filelib:wildcard("../test/*.so")++filelib:wildcard("../test/*.dll")), 10 | staticsqls => {"INSERT INTO tab VALUES (?1,?2);"}, 11 | dbsize => ?DBSIZE, 12 | rthreads => ?READTHREADS, 13 | wthreads => ?WRITETHREADS, 14 | counters => 10, 15 | timecounter => 0, 16 | lmdbsync => 0, 17 | maxtime => 90, 18 | nbatch => 30}). 19 | -define(INIT,actordb_driver:init(?CFG)). 20 | -define(READ,actordb_driver:exec_read). 21 | 22 | run_test_() -> 23 | [file:delete(Fn) || Fn <- filelib:wildcard("wal.*")], 24 | [file:delete(Fn) || Fn <- filelib:wildcard("*.txt")++filelib:wildcard("*.db")++["lmdb","lmdb-lock"]], 25 | [ 26 | fun lz4/0, 27 | fun modes/0, 28 | fun dbcopy/0, 29 | fun checkpoint/0, 30 | {timeout, 25, fun checkpoint1/0}, 31 | fun bigtrans/0, 32 | fun bigtrans_check/0, 33 | {timeout,60*10,fun async/0}, 34 | fun problem_checkpoint/0, 35 | fun problem_rewind/0 36 | % {timeout,25,fun open_test/0} 37 | ]. 38 | 39 | lz4() -> 40 | ?debugFmt("lz4",[]), 41 | ?INIT, 42 | Bin1 = binary:copy(<<"SELECT * FROM WTF;">>,2), 43 | {Compressed1,CompressedSize1} = actordb_driver:lz4_compress(Bin1), 44 | % ?debugFmt("Compressed ~p size ~p ",[byte_size(Compressed),CompressedSize]), 45 | Bin1 = actordb_driver:lz4_decompress(Compressed1,byte_size(Bin1),CompressedSize1), 46 | ok. 47 | 48 | modes() -> 49 | ?debugFmt("modes",[]), 50 | ?INIT, 51 | Sql = <<"select name, sql from sqlite_master where type='table';", 52 | "$PRAGMA cache_size=10;">>, 53 | {ok,Db,_} = actordb_driver:open(":memory:",1,Sql), 54 | {ok,_} = actordb_driver:exec_script(<<"$CREATE TABLE tab (id INTEGER PRIMARY KEY, txt TEXT);", 55 | "$CREATE TABLE tab1 (id INTEGER PRIMARY KEY, txt TEXT);", 56 | "$CREATE TABLE tab2 (id INTEGER PRIMARY KEY, val DOUBLE);" 57 | "$ALTER TABLE tab ADD i INTEGER;$CREATE TABLE tabx (id INTEGER PRIMARY KEY, txt TEXT);">>,Db), 58 | {ok,_} = actordb_driver:exec_script("INSERT INTO tab VALUES (1, 'asdadad',1);",Db), 59 | {ok,_} = actordb_driver:exec_script("INSERT INTO tab2 VALUES (1, 32.0);",Db), 60 | % if libhalf extension is present 61 | ?debugFmt("half=~p",[?READ("SELECT * from tab2 WHERE half(val) == 16.0;",Db)]), 62 | {ok,[_]} = ?READ("SELECT * from tab;",Db), 63 | Sql1 = "INSERT INTO tab VALUES (2, 'asdadad',1);", 64 | Sql2 = "INSERT INTO tab VALUES (3, 'tritri',1);", 65 | Sql3 = "SELECT * FROM tab;", 66 | {ok,{_,_,_}} = R = actordb_driver:exec_script({Sql1,Sql2,Sql3},Db), 67 | % ?debugFmt("Tuple exec ~p", [R]). 68 | 69 | {ok,Blob} = actordb_driver:open("myfile",0,blob), 70 | [begin 71 | % ?debugFmt("Blob ~p",[N]), 72 | Bin1 = iolist_to_binary([<<"page_1_">>,integer_to_list(N)]), 73 | Bin2 = iolist_to_binary([<<"page_2_">>,integer_to_list(N)]), 74 | {ok,{[],[]}} = actordb_driver:exec_script({1,2},{Bin1,Bin2},Blob), 75 | case actordb_driver:exec_script({1,2,3},Blob) of 76 | {ok,{[Bin1],[Bin2],[]}} -> 77 | ok; 78 | _BM -> 79 | ?debugFmt("Match failed on ~p: ~p",[N,_BM]), 80 | throw(badmatch) 81 | end 82 | end || N <- lists:seq(1,1000)], 83 | ?debugFmt("STATEM",[]), 84 | {ok,2,0} = actordb_driver:stmt_info(Db,"insert into tab values (?1,?2,3);"), 85 | {ok,1,3} = actordb_driver:stmt_info(Db,"select * from tab where id=?1;"), 86 | 87 | ok. 88 | 89 | % open_test() -> 90 | % ?debugFmt("Opening and closing lots of connections for 20s",[]), 91 | % ?INIT, 92 | % Val = binary:copy(<<"a">>,1024*1024), 93 | % {Pid,_} = spawn_monitor(fun() -> loop_open(Val) end), 94 | % receive 95 | % {'DOWN',_Monitor,_,_PID,Reason} -> 96 | % exit(Reason) 97 | % after 20000 -> 98 | % Pid ! stop, 99 | % receive 100 | % {'DOWN',_Monitor,_,_PID,Reason} -> 101 | % ?debugFmt("Opened: ~p times",[Reason]) 102 | % after 2000 -> 103 | % exit(Pid,stop) 104 | % end 105 | % end, 106 | % code:delete(actordb_driver_nif), 107 | % code:purge(actordb_driver_nif), 108 | % ok. 109 | 110 | % loop_open(Val) -> 111 | % lo(undefined,Val,0). 112 | % lo(undefined,Val,N) -> 113 | % {Pid,_} = spawn_monitor(fun() -> 114 | % % ?debugFmt("Open start ~p",[N]), 115 | % % Always open on same name. This is to avoid write transactions to actors db. 116 | % {ok,Db} = actordb_driver:open("ac",N), 117 | % % Sql = <<"CREATE TABLE tab (id integer primary key, val text);insert into tab values (1,?1);">>, 118 | % % {ok,_} = actordb_driver:exec_script(Sql,[[[Val]]],Db,infinity,1,1,<<>>), 119 | % % ?debugFmt("Open complete ~p",[N]), 120 | % exit(normal) 121 | % end), 122 | % lo(Pid,Val,N); 123 | % lo(Pid,Val,N) -> 124 | % receive 125 | % {'DOWN',_Monitor,_,Pid,normal} -> 126 | % % ?debugFmt("Opened exit ~p",[N]), 127 | % lo(undefined,Val,N+1); 128 | % {'DOWN',_,_,_,_} -> 129 | % exit(error); 130 | % stop -> 131 | % exit(N) 132 | % end. 133 | 134 | async() -> 135 | ?debugFmt("Running many async reads/writes for 20s",[]), 136 | ?INIT, 137 | application:ensure_all_started(crypto), 138 | ets:new(ops,[set,public,named_table,{write_concurrency,true}]), 139 | ets:insert(ops,{w,0}), 140 | ets:insert(ops,{r,0}), 141 | RandBytes = [base64:encode(crypto:strong_rand_bytes(128)) || _ <- lists:seq(1,1000)], 142 | Pids = [element(1,spawn_monitor(fun() -> w(P,RandBytes) end)) || P <- lists:seq(1,200)], 143 | % Syncer = spawn(fun() -> syncer() end), 144 | receive 145 | {'DOWN',_Monitor,_,_PID,Reason} -> 146 | exit(Reason) 147 | after 20000 -> 148 | ok 149 | end, 150 | % Syncer ! stop, 151 | [P ! stop || P <- Pids], 152 | {Reads,Writes} = rec_counts(0,0), 153 | ?debugFmt("Reads: ~p, Writes: ~p",[Reads,Writes]), 154 | garbage_collect(), 155 | code:delete(actordb_driver_nif), 156 | code:purge(actordb_driver_nif). 157 | rec_counts(R,W) -> 158 | receive 159 | {'DOWN',_Monitor,_,_PID,{R1,W1}} -> 160 | rec_counts(R+R1, W+W1) 161 | after 2000 -> 162 | {R,W} 163 | end. 164 | 165 | syncer() -> 166 | receive 167 | stop -> 168 | ok 169 | after 100 -> 170 | ok = actordb_driver:fsync(), 171 | syncer() 172 | end. 173 | 174 | w(N,RandList) -> 175 | {ok,Db} = actordb_driver:open("ac"++integer_to_list(N),N), 176 | % {ok,Db} = actordb_driver:open(":memory:",N), 177 | Sql = "CREATE TABLE tab (id integer primary key, val text);", 178 | {ok,_} = actordb_driver:exec_script(Sql,Db,infinity,1,1,<<>>), 179 | w(Db,N,0,0,1,RandList,[]). 180 | w(Db,Me,R,W,C,[Rand|T],L) -> 181 | % {_,QL} = erlang:process_info(self(),message_queue_len), 182 | % case QL of 183 | % 0 -> 184 | % ok; 185 | % _ -> 186 | % exit({R,W}) 187 | % end, 188 | receive 189 | stop -> 190 | exit({R,W}) 191 | after 0 -> 192 | case C rem 2 of 193 | 0 when C rem 20 == 0 -> 194 | % case Me == 1 andalso C rem 200 == 0 of 195 | % true -> 196 | % ?debugFmt("me=1, c=~p",[C]); 197 | % _ -> 198 | % ok 199 | % end, 200 | actordb_driver:checkpoint(Db,C-20), 201 | w(Db,Me,R,W,C+1,T,[Rand|L]); 202 | % _ when C rem 101 == 0, Me == 1 -> 203 | % ?debugFmt("Contention situations:~p",[actordb_driver:noop(Db)]); 204 | 0 -> 205 | % Using static sql with parameterized queries cuts down on sql parsing 206 | % Sql = <<"INSERT INTO tab VALUES (?1,?2);">>, 207 | Sql = <<"#s00;">>, 208 | {ok,_} = actordb_driver:exec_script(Sql,[[[C,Rand]]],Db,infinity,1,C,<<>>), 209 | w(Db,Me,R,W+1,C+1,T,[Rand|L]); 210 | _ -> 211 | {ok,_RR} = ?READ("select * from tab limit 1",Db), 212 | w(Db,Me,R+1,W,C+1,T,[Rand|L]) 213 | end 214 | end; 215 | w(Db,Me,R,W,C,[],L) -> 216 | w(Db,Me,R,W,C,L,[]). 217 | 218 | problem_checkpoint() -> 219 | case file:read_file_info("../problemlmdb") of 220 | {ok,_} -> 221 | garbage_collect(), 222 | code:delete(actordb_driver_nif), 223 | code:purge(actordb_driver_nif), 224 | false = code:is_loaded(actordb_driver_nif), 225 | case file:read_file_info("lmdb") of 226 | {ok,_} -> 227 | ok = file:rename("lmdb","lmdb_prev"), 228 | ok = file:delete("lmdb-lock"); 229 | _ -> 230 | ok 231 | end, 232 | {ok,_} = file:copy("../problemlmdb","lmdb"), 233 | ?INIT, 234 | {ok,Db} = actordb_driver:open("actors/irenatest2%40onyx.biocoded.com.user"), 235 | % ok = actordb_driver:wal_rewind(Db,5400), 236 | ok = actordb_driver:checkpoint(Db,5700), 237 | ?debugFmt("Problem checkpoint success",[]), 238 | ok; 239 | _ -> 240 | ?debugFmt("Skipping problem checkpoint test, you do not have the file",[]), 241 | ok 242 | end. 243 | problem_rewind() -> 244 | case file:read_file_info("../problemlmdb") of 245 | {ok,_} -> 246 | garbage_collect(), 247 | code:delete(actordb_driver_nif), 248 | code:purge(actordb_driver_nif), 249 | false = code:is_loaded(actordb_driver_nif), 250 | case file:read_file_info("lmdb") of 251 | {ok,_} -> 252 | ok = file:delete("lmdb"), 253 | ok = file:delete("lmdb-lock"); 254 | _ -> 255 | ok 256 | end, 257 | {ok,_} = file:copy("../problemlmdb","lmdb"), 258 | ?INIT, 259 | {ok,Db} = actordb_driver:open("actors/irenatest2%40onyx.biocoded.com.user"), 260 | ok = actordb_driver:wal_rewind(Db,5400), 261 | ?debugFmt("Problem rewind success",[]), 262 | ok; 263 | _ -> 264 | ?debugFmt("Skipping problem rewind test, you do not have the file",[]), 265 | ok 266 | end. 267 | 268 | dbcopy() -> 269 | ?INIT, 270 | ?debugFmt("Dbcopy",[]), 271 | {ok,Db} = actordb_driver:open("original"), 272 | {ok,_} = actordb_driver:exec_script("CREATE TABLE tab (id INTEGER PRIMARY KEY, txt TEXT, val INTEGER);",Db,infinity,1,1,<<>>), 273 | ok = actordb_driver:term_store(Db,10,<<"abcdef">>), 274 | % Sleep because term store does not wait for response and actor_info uses read thread 275 | % term_store uses write thread. 276 | timer:sleep(500), 277 | {{1,1},{1,1},{0,0},2,2,10,<<"abcdef">>} = actordb_driver:actor_info("original",0), 278 | ok = actordb_driver:term_store("original",10,<<"abcdef1">>,0), 279 | EN = 100, 280 | [ {ok,_} = actordb_driver:exec_script(["INSERT INTO tab VALUES (",integer_to_list(N+100),",'aaa',2)"],Db,infinity,1,N,<<>>) || N <- lists:seq(2,EN)], 281 | case ?CFG of 282 | #{lmdbsync := 1} -> 283 | ok; 284 | _ -> 285 | 0 = actordb_driver:fsync_num(Db), 286 | ok = actordb_driver:fsync(Db), 287 | 0 = actordb_driver:fsync_num(Db) 288 | end, 289 | {ok,_} = actordb_driver:exec_script("INSERT INTO tab VALUES (2,'bbb',3)",Db,infinity,1,EN+1,<<>>), 290 | {ok,_} = actordb_driver:exec_script("INSERT INTO tab VALUES (3,'ccc',4)",Db,infinity,1,EN+2,<<>>), 291 | ok = actordb_driver:replication_done(Db), 292 | {ok,Select} = ?READ("select * from tab;",Db), 293 | 294 | {ok,_} = actordb_driver:exec_script("SAVEPOINT 'adb'; UPDATE tab SET txt='ccc123' where id=3;",Db,infinity,1,EN+3,<<>>), 295 | {ok,_} = actordb_driver:exec_script("ROLLBACK;",Db), 296 | {ok,Select} = ?READ("select * from tab;",Db), 297 | % ?debugFmt("Select ~p",[Select]), 298 | {ok,Copy} = actordb_driver:open("copy"), 299 | {ok,Iter,Bin,Head,Done} = actordb_driver:iterate_db(Db,0,0), 300 | ok = actordb_driver:inject_page(Copy,Bin,Head), 301 | % This will export into an sqlite file named sq. 302 | {ok,F} = file:open("sq",[write,binary,raw]), 303 | ?debugFmt("Exporting actor into an sqlite file ~p",[Done]), 304 | % readpages(Head,Bin,F), 305 | file:write(F,actordb_driver:lz4_decompress(Bin,4096)), 306 | case Done > 0 of 307 | true -> 308 | ok; 309 | _ -> 310 | copy(Db,Iter,F,Copy) 311 | end, 312 | % ?debugFmt("pages=~pB, evterm=~p, evnum=~p",[byte_size(Bin), Evterm, Evnum1]), 313 | file:close(F), 314 | ?debugFmt("Reading from exported sqlite file: ~p",[os:cmd("sqlite3 sq \"select * from tab\"")]), 315 | {ok,Select} = ?READ("select * from tab;",Copy), 316 | ?debugFmt("Reading from copy!: ~p",[Select]), 317 | file:delete("sq"), 318 | 319 | % {ok,Copy2} = actordb_driver:open("copy2"), 320 | % {ok,_Iter2,Bin2,Head2,Done2} = actordb_driver:iterate_db(Db,1,1), % get pgno1 and pgno2 (create table) 321 | % <> = Head2, 322 | % ?debugFmt("Second inject ~p ~p ~p ~p",[A,B,PGNO,Commit]), 323 | % readpages(Bin2,undefined), 324 | % Inject pgno1 325 | % case Done2 > 0 of 326 | % true -> 327 | % ok; 328 | % _ -> 329 | % copy(Db,_Iter2,undefined,Copy2) 330 | % end, 331 | % {ok,_Iter3,Bin3,Head3,_Done3} = actordb_driver:iterate_db(Db,1,1), % get pgno2 with first insert 332 | % ok = actordb_driver:inject_page(Copy2,Bin3,Head3), 333 | % FirstInject = {ok,[[{columns,{<<"id">>,<<"txt">>,<<"val">>}},{rows,[{102,<<"aaa">>,2}]}]]}, 334 | % FirstInject = actordb_driver:exec_script("select * from tab;",Copy2), 335 | % ?debugFmt("Reading from second copy success! - only first insert:~n ~p",[FirstInject]), 336 | {{1,1},{1,102},{0,0},2,103,10,<<"abcdef1">>} = Info = actordb_driver:actor_info("original",0), 337 | ?debugFmt("Get actor info ~p",[Info]), 338 | ?debugFmt("Rewind original to last insert!",[]), 339 | {ok,1} = actordb_driver:iterate_db(Db,2,10), 340 | % ok = actordb_driver:checkpoint(Db,60). 341 | ok = actordb_driver:wal_rewind(Db,100), 342 | {ok,[[{columns,{<<"id">>,<<"txt">>,<<"val">>}}, 343 | {rows,[{199,<<"aaa">>,2},{198,<<"aaa">>,2}|_] = Rows}]]} = ?READ("select * from tab;",Db), 344 | [{102,<<"aaa">>,2}|_] = lists:reverse(Rows). 345 | % ?debugFmt("After rewind to evnum=2: ~p",[FirstInject]). 346 | 347 | checkpoint() -> 348 | garbage_collect(), 349 | ?debugFmt("Checkpoint!",[]), 350 | {ok,Db} = actordb_driver:open("original"), 351 | {ok,S} = actordb_driver:exec_script("select * from tab;",Db), 352 | ok = actordb_driver:checkpoint(Db,60), 353 | {ok,S} = ?READ("select * from tab;",Db), 354 | [[{columns,{<<"id">>,<<"txt">>,<<"val">>}}, 355 | {rows,[{199,<<"aaa">>,2},{198,<<"aaa">>,2}|_]}]] = S, 356 | % ?debugFmt("AfterCheckpoint ~p",[S]), 357 | ok = actordb_driver:wal_rewind(Db,0, "create table tab1 (id INTEGER PRIMARY KEY, x TEXT);insert into tab1 values (1,'replaced');"), 358 | ?debugFmt("After rewind+replace to 0=~p",[actordb_driver:actor_info("original",0)]), 359 | {ok,[[{columns,{<<"name">>}},{rows,[{<<"tab1">>}]}], 360 | [{columns,{<<"id">>,<<"x">>}},{rows,[{1,<<"replaced">>}]}]]} = 361 | actordb_driver:exec_script("select * from tab1;select name from sqlite_master where type='table';",Db), 362 | ok. 363 | 364 | checkpoint1() -> 365 | ?INIT, 366 | ?debugFmt("Checkpoint test",[]), 367 | application:ensure_all_started(crypto), 368 | {ok,Db} = actordb_driver:open("ckpt_test",1), 369 | Sql = "CREATE TABLE tab (id integer primary key, val text);", 370 | {ok,_} = actordb_driver:exec_script(Sql,Db,infinity,1,1,<<>>), 371 | checkpoint1(Db,2). 372 | checkpoint1(Db,C) when C >= 1000 -> 373 | ok; 374 | checkpoint1(Db,C) -> 375 | Sql = ["INSERT INTO tab VALUES (",integer_to_list(C),",'",base64:encode(crypto:strong_rand_bytes(1024*10)),"');"], 376 | {ok,_} = actordb_driver:exec_script(Sql,Db,infinity,1,C,<<>>), 377 | case C > 5 of 378 | true when C rem 3 == 0 -> 379 | ok = actordb_driver:checkpoint(Db,C-5); 380 | _ -> 381 | ok 382 | end, 383 | checkpoint1(Db,C+1). 384 | 385 | copy(Orig,Iter,F,Copy) -> 386 | case actordb_driver:iterate_db(Orig,Iter) of 387 | {ok,Iter1,Bin,Head,Done} -> 388 | <> = Head, 389 | ?debugFmt("pages=~pB, evterm=~p, evnum=~p",[byte_size(Bin), Evterm, Evnum]), 390 | ok = actordb_driver:inject_page(Copy,Bin,Head), 391 | file:write(F,actordb_driver:lz4_decompress(Bin,4096)), 392 | case Done > 0 of 393 | true -> 394 | ok; 395 | _ -> 396 | copy(Orig,Iter1,F,Copy) 397 | end 398 | end. 399 | 400 | 401 | bigtrans() -> 402 | ?INIT, 403 | application:ensure_all_started(crypto), 404 | ?debugFmt("Generating large sql",[]), 405 | Sql = [<<"SAVEPOINT 'adb';", 406 | "CREATE TABLE IF NOT EXISTS __transactions (id INTEGER PRIMARY KEY, tid INTEGER, updater INTEGER, node TEXT,", 407 | "schemavers INTEGER, sql TEXT);", 408 | "CREATE TABLE IF NOT EXISTS __adb (id INTEGER PRIMARY KEY, val TEXT);", 409 | "CREATE TABLE t_task ( id INTEGER NOT NULL, project_id INTEGER NOT NULL, group_id INTEGER NOT NULL, owner_id TEXT NOT NULL,", 410 | " assignee_id TEXT, title TEXT NOT NULL, category TEXT NOT NULL, status TEXT NOT NULL, priority INTEGER NOT NULL, ", 411 | " created INTEGER NOT NULL, assigned INTEGER NOT NULL, deadline INTEGER NOT NULL, PRIMARY KEY(id)) WITHOUT ", 412 | " ROWID;", 413 | "CREATE TABLE t_comments ( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, posted INTEGER NOT NULL, content TEXT NOT NULL,", 414 | " poster_id INTEGER NOT NULL, parent_id INTEGER);", 415 | "CREATE TABLE t_multimedia ( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, mime TEXT NOT NULL,", 416 | " content BLOB, owner_id INTEGER NOT NULL);", 417 | "CREATE TABLE t_task_files ( file_id INTEGER PRIMARY KEY REFERENCES multimedia(id));", 418 | "CREATE TABLE t_comment_files ( file_id INTEGER PRIMARY KEY REFERENCES multimedia(id));", 419 | "CREATE TABLE t_history ( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, prev_assignee_id INTEGER NOT NULL,", 420 | " prev_project_id INTEGER NOT NULL, prev_group_id INTEGER NOR NULL, moved INTEGER NOT NULL, info TEXT NOT NULL);", 421 | "CREATE TABLE t_followers ( id INTEGER PRIMARY KEY AUTOINCREMENT, user_id TEXT NOT NULL, joined INTEGER NOT NULL);", 422 | "INSERT INTO t_task (id,project_id,group_id,title,category,priority,owner_id,assignee_id,created,assigned", 423 | ",deadline,status) VALUES (1015,1,1014,'Task','Sexy','low','bbb','bbb',1427955816,1427955816,100,'waiting');", 424 | "INSERT OR REPLACE INTO __adb (id,val) VALUES (1,'1');INSERT OR REPLACE INTO __adb (id,val) VALUES (9,'1');", 425 | "INSERT OR REPLACE INTO __adb (id,val) VALUES (3,'7');INSERT OR REPLACE INTO __adb (id,val) VALUES (4,'task');", 426 | "INSERT OR REPLACE INTO __adb (id,val) VALUES (1,'0');INSERT OR REPLACE INTO __adb (id,val) VALUES (9,'0');", 427 | "INSERT OR REPLACE INTO __adb (id,val) VALUES (7,'614475188');">>, 428 | "INSERT INTO __adb (id,val) VALUES (10,'",base64:encode(crypto:strong_rand_bytes(1024*1024*10)),"');", % worst case scenario, incompressible data 429 | "INSERT INTO __adb (id,val) VALUES (?1, ?2);", 430 | "INSERT INTO __adb (id,val) VALUES (?1, ?2);", 431 | "DELETE from __adb where id=10;", 432 | "RELEASE SAVEPOINT 'adb';"], 433 | ?debugFmt("Running large sql",[]), 434 | {ok,Db} = actordb_driver:open("big.db"), 435 | Param = [[[111,"fromparam1"],[222,"fromparam2"],[333,"fromparam3"]], 436 | [[444,"secondstat"],[555,"secondstatement"]]], 437 | Res = [{changes,555,1},{changes,555,1},{changes,555,1},{changes,444,1},{changes,333,1},{changes,222,1}, 438 | {changes,111,1},{changes,10,1},{changes,7,1},{changes,9,1},{changes,1,1},{changes,4,1},{changes,3,1}, 439 | {changes,9,1},{changes,1,1},{changes,0,1},{changes,0,0},{changes,0,0},{changes,0,0},{changes,0,0}, 440 | {changes,0,0},{changes,0,0},{changes,0,0},{changes,0,0},{changes,0,0},{changes,0,0}], 441 | {ok,Res} = actordb_driver:exec_script(Sql,Param,Db), 442 | 443 | SR = {ok,[[{columns,{<<"id">>,<<"val">>}},{rows,[{555,<<"secondstatement">>}, 444 | {444,<<"secondstat">>},{333,<<"fromparam3">>},{222,<<"fromparam2">>},{111,<<"fromparam1">>}, 445 | {9,<<"0">>},{7,<<"614475188">>},{4,<<"task">>},{3,<<"7">>},{1,<<"0">>}]}]]}, 446 | SR = ?READ("SELECT * FROM __adb;",Db), 447 | ?debugFmt("select=~p",[SR]), 448 | 449 | SR2 = [[{columns,{<<"id">>,<<"val">>}},{rows,[{555,<<"secondstatement">>}]}], 450 | [{columns,{<<"id">>,<<"val">>}},{rows,[{444,<<"secondstat">>}]}], 451 | [{columns,{<<"id">>,<<"val">>}},{rows,[{9,<<"0">>}]}], 452 | [{columns,{<<"id">>,<<"val">>}},{rows,[{3,<<"7">>}]}]], 453 | {ok,SR2} = ?READ(["SELECT * FROM __adb where id=?1;","SELECT * FROM __adb where id=?1;"],[[[3],[9]],[[444],[555]]],Db), 454 | ?debugFmt("Double param select=~p",[SR2]). 455 | 456 | bigtrans_check() -> 457 | ?debugFmt("Reload and checking if all still there!",[]), 458 | file:copy("drv_nonode.txt","prev_drv_nonode.txt"), 459 | garbage_collect(), 460 | code:delete(actordb_driver_nif), 461 | code:purge(actordb_driver_nif), 462 | false = code:is_loaded(actordb_driver_nif), 463 | ?INIT, 464 | 465 | Sql = "select * from __adb;", 466 | {ok,Db2} = actordb_driver:open("big.db"), 467 | R = ?READ(Sql,Db2), 468 | ?debugFmt("~p",[R]), 469 | ok. 470 | -------------------------------------------------------------------------------- /c_src/lz4.h: -------------------------------------------------------------------------------- 1 | /* 2 | LZ4 - Fast LZ compression algorithm 3 | Header File 4 | Copyright (C) 2011-2015, Yann Collet. 5 | 6 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are 10 | met: 11 | 12 | * Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | * Redistributions in binary form must reproduce the above 15 | copyright notice, this list of conditions and the following disclaimer 16 | in the documentation and/or other materials provided with the 17 | distribution. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | You can contact the author at : 32 | - LZ4 source repository : https://github.com/Cyan4973/lz4 33 | - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c 34 | */ 35 | #pragma once 36 | 37 | #if defined (__cplusplus) 38 | extern "C" { 39 | #endif 40 | 41 | /* 42 | * lz4.h provides block compression functions, and gives full buffer control to programmer. 43 | * If you need to generate inter-operable compressed data (respecting LZ4 frame specification), 44 | * and can let the library handle its own memory, please use lz4frame.h instead. 45 | */ 46 | 47 | /************************************** 48 | * Version 49 | **************************************/ 50 | #define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ 51 | #define LZ4_VERSION_MINOR 7 /* for new (non-breaking) interface capabilities */ 52 | #define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ 53 | #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) 54 | int LZ4_versionNumber (void); 55 | 56 | /************************************** 57 | * Tuning parameter 58 | **************************************/ 59 | /* 60 | * LZ4_MEMORY_USAGE : 61 | * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 62 | * Increasing memory usage improves compression ratio 63 | * Reduced memory usage can improve speed, due to cache effect 64 | * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache 65 | */ 66 | #define LZ4_MEMORY_USAGE 14 67 | 68 | 69 | /************************************** 70 | * Simple Functions 71 | **************************************/ 72 | 73 | int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize); 74 | int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize); 75 | 76 | /* 77 | LZ4_compress_default() : 78 | Compresses 'sourceSize' bytes from buffer 'source' 79 | into already allocated 'dest' buffer of size 'maxDestSize'. 80 | Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize). 81 | It also runs faster, so it's a recommended setting. 82 | If the function cannot compress 'source' into a more limited 'dest' budget, 83 | compression stops *immediately*, and the function result is zero. 84 | As a consequence, 'dest' content is not valid. 85 | This function never writes outside 'dest' buffer, nor read outside 'source' buffer. 86 | sourceSize : Max supported value is LZ4_MAX_INPUT_VALUE 87 | maxDestSize : full or partial size of buffer 'dest' (which must be already allocated) 88 | return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize) 89 | or 0 if compression fails 90 | 91 | LZ4_decompress_safe() : 92 | compressedSize : is the precise full size of the compressed block. 93 | maxDecompressedSize : is the size of destination buffer, which must be already allocated. 94 | return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize) 95 | If destination buffer is not large enough, decoding will stop and output an error code (<0). 96 | If the source stream is detected malformed, the function will stop decoding and return a negative result. 97 | This function is protected against buffer overflow exploits, including malicious data packets. 98 | It never writes outside output buffer, nor reads outside input buffer. 99 | */ 100 | 101 | 102 | /************************************** 103 | * Advanced Functions 104 | **************************************/ 105 | #define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ 106 | #define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) 107 | 108 | /* 109 | LZ4_compressBound() : 110 | Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) 111 | This function is primarily useful for memory allocation purposes (destination buffer size). 112 | Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). 113 | Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize) 114 | inputSize : max supported value is LZ4_MAX_INPUT_SIZE 115 | return : maximum output size in a "worst case" scenario 116 | or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE) 117 | */ 118 | int LZ4_compressBound(int inputSize); 119 | 120 | /* 121 | LZ4_compress_fast() : 122 | Same as LZ4_compress_default(), but allows to select an "acceleration" factor. 123 | The larger the acceleration value, the faster the algorithm, but also the lesser the compression. 124 | It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. 125 | An acceleration value of "1" is the same as regular LZ4_compress_default() 126 | Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1. 127 | */ 128 | int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration); 129 | 130 | 131 | /* 132 | LZ4_compress_fast_extState() : 133 | Same compression function, just using an externally allocated memory space to store compression state. 134 | Use LZ4_sizeofState() to know how much memory must be allocated, 135 | and allocate it on 8-bytes boundaries (using malloc() typically). 136 | Then, provide it as 'void* state' to compression function. 137 | */ 138 | int LZ4_sizeofState(void); 139 | int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration); 140 | 141 | 142 | /* 143 | LZ4_compress_destSize() : 144 | Reverse the logic, by compressing as much data as possible from 'source' buffer 145 | into already allocated buffer 'dest' of size 'targetDestSize'. 146 | This function either compresses the entire 'source' content into 'dest' if it's large enough, 147 | or fill 'dest' buffer completely with as much data as possible from 'source'. 148 | *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'. 149 | New value is necessarily <= old value. 150 | return : Nb bytes written into 'dest' (necessarily <= targetDestSize) 151 | or 0 if compression fails 152 | */ 153 | int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize); 154 | 155 | 156 | /* 157 | LZ4_decompress_fast() : 158 | originalSize : is the original and therefore uncompressed size 159 | return : the number of bytes read from the source buffer (in other words, the compressed size) 160 | If the source stream is detected malformed, the function will stop decoding and return a negative result. 161 | Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes. 162 | note : This function fully respect memory boundaries for properly formed compressed data. 163 | It is a bit faster than LZ4_decompress_safe(). 164 | However, it does not provide any protection against intentionally modified data stream (malicious input). 165 | Use this function in trusted environment only (data to decode comes from a trusted source). 166 | */ 167 | int LZ4_decompress_fast (const char* source, char* dest, int originalSize); 168 | 169 | /* 170 | LZ4_decompress_safe_partial() : 171 | This function decompress a compressed block of size 'compressedSize' at position 'source' 172 | into destination buffer 'dest' of size 'maxDecompressedSize'. 173 | The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached, 174 | reducing decompression time. 175 | return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize) 176 | Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller. 177 | Always control how many bytes were decoded. 178 | If the source stream is detected malformed, the function will stop decoding and return a negative result. 179 | This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets 180 | */ 181 | int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize); 182 | 183 | 184 | /*********************************************** 185 | * Streaming Compression Functions 186 | ***********************************************/ 187 | #define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4) 188 | #define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(long long)) 189 | /* 190 | * LZ4_stream_t 191 | * information structure to track an LZ4 stream. 192 | * important : init this structure content before first use ! 193 | * note : only allocated directly the structure if you are statically linking LZ4 194 | * If you are using liblz4 as a DLL, please use below construction methods instead. 195 | */ 196 | typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t; 197 | 198 | /* 199 | * LZ4_resetStream 200 | * Use this function to init an allocated LZ4_stream_t structure 201 | */ 202 | void LZ4_resetStream (LZ4_stream_t* streamPtr); 203 | 204 | /* 205 | * LZ4_createStream will allocate and initialize an LZ4_stream_t structure 206 | * LZ4_freeStream releases its memory. 207 | * In the context of a DLL (liblz4), please use these methods rather than the static struct. 208 | * They are more future proof, in case of a change of LZ4_stream_t size. 209 | */ 210 | LZ4_stream_t* LZ4_createStream(void); 211 | int LZ4_freeStream (LZ4_stream_t* streamPtr); 212 | 213 | /* 214 | * LZ4_loadDict 215 | * Use this function to load a static dictionary into LZ4_stream. 216 | * Any previous data will be forgotten, only 'dictionary' will remain in memory. 217 | * Loading a size of 0 is allowed. 218 | * Return : dictionary size, in bytes (necessarily <= 64 KB) 219 | */ 220 | int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); 221 | 222 | /* 223 | * LZ4_compress_fast_continue 224 | * Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio. 225 | * Important : Previous data blocks are assumed to still be present and unmodified ! 226 | * 'dst' buffer must be already allocated. 227 | * If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. 228 | * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero. 229 | */ 230 | int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration); 231 | 232 | /* 233 | * LZ4_saveDict 234 | * If previously compressed data block is not guaranteed to remain available at its memory location 235 | * save it into a safer place (char* safeBuffer) 236 | * Note : you don't need to call LZ4_loadDict() afterwards, 237 | * dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue() 238 | * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error 239 | */ 240 | int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize); 241 | 242 | 243 | /************************************************ 244 | * Streaming Decompression Functions 245 | ************************************************/ 246 | 247 | #define LZ4_STREAMDECODESIZE_U64 4 248 | #define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long)) 249 | typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t; 250 | /* 251 | * LZ4_streamDecode_t 252 | * information structure to track an LZ4 stream. 253 | * init this structure content using LZ4_setStreamDecode or memset() before first use ! 254 | * 255 | * In the context of a DLL (liblz4) please prefer usage of construction methods below. 256 | * They are more future proof, in case of a change of LZ4_streamDecode_t size in the future. 257 | * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure 258 | * LZ4_freeStreamDecode releases its memory. 259 | */ 260 | LZ4_streamDecode_t* LZ4_createStreamDecode(void); 261 | int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); 262 | 263 | /* 264 | * LZ4_setStreamDecode 265 | * Use this function to instruct where to find the dictionary. 266 | * Setting a size of 0 is allowed (same effect as reset). 267 | * Return : 1 if OK, 0 if error 268 | */ 269 | int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); 270 | 271 | /* 272 | *_continue() : 273 | These decoding functions allow decompression of multiple blocks in "streaming" mode. 274 | Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB) 275 | In the case of a ring buffers, decoding buffer must be either : 276 | - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions) 277 | In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB). 278 | - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. 279 | maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block. 280 | In which case, encoding and decoding buffers do not need to be synchronized, 281 | and encoding ring buffer can have any size, including small ones ( < 64 KB). 282 | - _At least_ 64 KB + 8 bytes + maxBlockSize. 283 | In which case, encoding and decoding buffers do not need to be synchronized, 284 | and encoding ring buffer can have any size, including larger than decoding buffer. 285 | Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer, 286 | and indicate where it is saved using LZ4_setStreamDecode() 287 | */ 288 | int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize); 289 | int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize); 290 | 291 | 292 | /* 293 | Advanced decoding functions : 294 | *_usingDict() : 295 | These decoding functions work the same as 296 | a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue() 297 | They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure. 298 | */ 299 | int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize); 300 | int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize); 301 | 302 | 303 | 304 | /************************************** 305 | * Obsolete Functions 306 | **************************************/ 307 | /* Deprecate Warnings */ 308 | /* Should these warnings messages be a problem, 309 | it is generally possible to disable them, 310 | with -Wno-deprecated-declarations for gcc 311 | or _CRT_SECURE_NO_WARNINGS in Visual for example. 312 | You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */ 313 | #ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK 314 | # define LZ4_DEPRECATE_WARNING_DEFBLOCK 315 | # define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 316 | # if (LZ4_GCC_VERSION >= 405) || defined(__clang__) 317 | # define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) 318 | # elif (LZ4_GCC_VERSION >= 301) 319 | # define LZ4_DEPRECATED(message) __attribute__((deprecated)) 320 | # elif defined(_MSC_VER) 321 | # define LZ4_DEPRECATED(message) __declspec(deprecated(message)) 322 | # else 323 | # pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler") 324 | # define LZ4_DEPRECATED(message) 325 | # endif 326 | #endif /* LZ4_DEPRECATE_WARNING_DEFBLOCK */ 327 | 328 | /* Obsolete compression functions */ 329 | /* These functions are planned to start generate warnings by r131 approximately */ 330 | int LZ4_compress (const char* source, char* dest, int sourceSize); 331 | int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize); 332 | int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); 333 | int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); 334 | int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); 335 | int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); 336 | 337 | /* Obsolete decompression functions */ 338 | /* These function names are completely deprecated and must no longer be used. 339 | They are only provided here for compatibility with older programs. 340 | - LZ4_uncompress is the same as LZ4_decompress_fast 341 | - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe 342 | These function prototypes are now disabled; uncomment them only if you really need them. 343 | It is highly recommended to stop using these prototypes and migrate to maintained ones */ 344 | /* int LZ4_uncompress (const char* source, char* dest, int outputSize); */ 345 | /* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */ 346 | 347 | /* Obsolete streaming functions; use new streaming interface whenever possible */ 348 | LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer); 349 | LZ4_DEPRECATED("use LZ4_createStream() instead") int LZ4_sizeofStreamState(void); 350 | LZ4_DEPRECATED("use LZ4_resetStream() instead") int LZ4_resetStreamState(void* state, char* inputBuffer); 351 | LZ4_DEPRECATED("use LZ4_saveDict() instead") char* LZ4_slideInputBuffer (void* state); 352 | 353 | /* Obsolete streaming decoding functions */ 354 | LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); 355 | LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); 356 | 357 | 358 | #if defined (__cplusplus) 359 | } 360 | #endif 361 | -------------------------------------------------------------------------------- /c_src/tool.c: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | // #define _TESTDBG_ 1 5 | #ifdef __linux__ 6 | #define _GNU_SOURCE 1 7 | #include 8 | #include 9 | #include 10 | #include 11 | #endif 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #ifndef _WIN32 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #else 28 | #include 29 | #include 30 | #endif 31 | 32 | // info|pages|log|actors 33 | #define PRINT_PAGES 1 34 | #define PRINT_INFO 2 35 | #define PRINT_LOG 4 36 | #define PRINT_ACTORS 8 37 | 38 | // Directly include sqlite3.c 39 | // This way we are sure the included version of sqlite3 is actually used. 40 | // If we were to just include "sqlite3.h" OSX would actually use /usr/lib/libsqlite3.dylib 41 | #define SQLITE_API static 42 | #define SQLITE_EXTERN static 43 | #include "sqlite3.c" 44 | 45 | 46 | #include "actordb_driver_nif.h" 47 | #include "lz4.h" 48 | 49 | static __thread db_thread *g_tsd_thread; 50 | static __thread priv_data *g_tsd_pd; 51 | static __thread db_connection *g_tsd_conn; 52 | static __thread mdbinf *g_tsd_wmdb; 53 | priv_data *g_pd; 54 | 55 | static void lock_wtxn(int env){} 56 | 57 | // wal.c code has been taken out of sqlite3.c and placed in wal.c file. 58 | // Every wal interface function is changed, but the wal-index code remains unchanged. 59 | #include "wal.c" 60 | 61 | 62 | typedef struct lmdb 63 | { 64 | MDB_env *menv; 65 | MDB_txn *txn; 66 | MDB_dbi infodb; 67 | MDB_dbi logdb; 68 | MDB_dbi pagesdb; 69 | MDB_dbi actorsdb; 70 | MDB_cursor *cursorLog; 71 | MDB_cursor *cursorPages; 72 | MDB_cursor *cursorInfo; 73 | MDB_cursor *cursorActors; 74 | }lmdb; 75 | 76 | static size_t file_size(const char *pth) 77 | { 78 | #ifdef _WIN32 79 | HANDLE h = CreateFile(pth, 80 | GENERIC_READ, 81 | FILE_SHARE_READ, 82 | NULL, 83 | OPEN_EXISTING, 84 | FILE_ATTRIBUTE_NORMAL, 85 | NULL); 86 | if (h == INVALID_HANDLE_VALUE) 87 | { 88 | DWORD err = GetLastError(); 89 | if (err == 32) 90 | { 91 | printf("Can not open file because it is used by another process.\n"); 92 | } 93 | return 0; 94 | } 95 | else 96 | { 97 | LARGE_INTEGER fs; 98 | GetFileSizeEx(h,&fs); 99 | CloseHandle(h); 100 | return fs.QuadPart; 101 | } 102 | #else 103 | size_t sz; 104 | FILE *file = fopen(pth,"rb"); 105 | if (!file) 106 | return 0; 107 | 108 | fseek(file, 0L, SEEK_END); 109 | sz = ftell(file); 110 | fseek(file, 0L, SEEK_SET); 111 | fclose(file); 112 | 113 | return sz; 114 | #endif 115 | } 116 | 117 | static int open_env(lmdb *lm, const char *pth, int flags) 118 | { 119 | // #if defined(__APPLE__) || defined(_WIN32) 120 | // u64 dbsize = 4096*1024*1024LL; 121 | // #else 122 | // // 1TB def size on linux 123 | // u64 dbsize = 4096*1024*1024*128*2LL; 124 | // #endif 125 | int rc; 126 | size_t dbsize = 0; 127 | 128 | if (!flags) 129 | { 130 | dbsize = file_size(pth) + 4096*100; 131 | } 132 | 133 | if ((rc = mdb_env_create(&lm->menv)) != MDB_SUCCESS) 134 | return rc; 135 | if ((rc = mdb_env_set_maxdbs(lm->menv,5)) != MDB_SUCCESS) 136 | return rc; 137 | if (dbsize) 138 | { 139 | if (mdb_env_set_mapsize(lm->menv,dbsize) != MDB_SUCCESS) 140 | return -1; 141 | } 142 | if ((rc = mdb_env_open(lm->menv, pth, MDB_NOSUBDIR | flags, 0664)) != MDB_SUCCESS) 143 | return rc; 144 | 145 | if ((rc = mdb_txn_begin(lm->menv, NULL, flags, &lm->txn)) != MDB_SUCCESS) 146 | return rc; 147 | 148 | if ((rc = mdb_dbi_open(lm->txn, "info", MDB_INTEGERKEY, &lm->infodb)) != MDB_SUCCESS) 149 | return rc; 150 | if ((rc = mdb_dbi_open(lm->txn, "actors", MDB_CREATE, &lm->actorsdb)) != MDB_SUCCESS) 151 | return rc; 152 | if ((rc = mdb_dbi_open(lm->txn, "log", MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &lm->logdb)) != MDB_SUCCESS) 153 | return rc; 154 | if ((rc = mdb_dbi_open(lm->txn, "pages", MDB_DUPSORT, &lm->pagesdb)) != MDB_SUCCESS) 155 | return rc; 156 | if (mdb_set_compare(lm->txn, lm->logdb, logdb_cmp) != MDB_SUCCESS) 157 | return -1; 158 | if (mdb_set_compare(lm->txn, lm->pagesdb, pagesdb_cmp) != MDB_SUCCESS) 159 | return -1; 160 | if (mdb_set_dupsort(lm->txn, lm->pagesdb, pagesdb_val_cmp) != MDB_SUCCESS) 161 | return -1; 162 | if (mdb_cursor_open(lm->txn, lm->logdb, &lm->cursorLog) != MDB_SUCCESS) 163 | return -1; 164 | if (mdb_cursor_open(lm->txn, lm->pagesdb, &lm->cursorPages) != MDB_SUCCESS) 165 | return -1; 166 | if (mdb_cursor_open(lm->txn, lm->infodb, &lm->cursorInfo) != MDB_SUCCESS) 167 | return -1; 168 | if (mdb_cursor_open(lm->txn, lm->actorsdb, &lm->cursorActors) != MDB_SUCCESS) 169 | return -1; 170 | 171 | return 0; 172 | } 173 | 174 | static void close_env(lmdb *lm) 175 | { 176 | if (lm->txn) 177 | mdb_txn_commit(lm->txn); 178 | if (lm->menv) 179 | mdb_env_close(lm->menv); 180 | } 181 | 182 | 183 | static int do_print(const char *pth, i64 actor, int what) 184 | { 185 | struct lmdb lm; 186 | MDB_val key, data; 187 | int rc, op; 188 | 189 | if ((rc = open_env(&lm, pth, MDB_RDONLY)) != 0) 190 | { 191 | printf("Unable to open lmdb environment %d\n",rc); 192 | return -1; 193 | } 194 | 195 | 196 | if (what & PRINT_ACTORS) 197 | { 198 | printf("-----------------------actorsdb--------------------------\n"); 199 | rc = mdb_cursor_get(lm.cursorActors,&key,&data,MDB_FIRST); 200 | while (rc == MDB_SUCCESS) 201 | { 202 | u64 index; 203 | memcpy(&index, data.mv_data, sizeof(u64)); 204 | printf("Actor=%.*s, id=%llu\n",(int)key.mv_size, (char*)key.mv_data, index); 205 | rc = mdb_cursor_get(lm.cursorActors,&key,&data,MDB_NEXT); 206 | } 207 | } 208 | 209 | if (what & PRINT_LOG) 210 | { 211 | printf("-----------------------logdb--------------------------\n"); 212 | rc = mdb_cursor_get(lm.cursorLog,&key,&data,MDB_FIRST); 213 | while (rc == MDB_SUCCESS) 214 | { 215 | u64 index, term, num; 216 | memcpy(&index, key.mv_data, sizeof(u64)); 217 | memcpy(&term, (u8*)key.mv_data + sizeof(u64), sizeof(u64)); 218 | memcpy(&num, (u8*)key.mv_data + sizeof(u64)*2, sizeof(u64)); 219 | if (actor == -1 || actor == index) 220 | { 221 | printf("logdb: actor=%llu, term=%llu, evnum=%llu\n",index, term,num); 222 | 223 | op = MDB_FIRST_DUP; 224 | while ((rc = mdb_cursor_get(lm.cursorLog,&key,&data, op)) == MDB_SUCCESS) 225 | { 226 | u32 pgno; 227 | memcpy(&pgno,data.mv_data,sizeof(u32)); 228 | printf(" pgno=%u\n",pgno); 229 | op = MDB_NEXT_DUP; 230 | } 231 | } 232 | rc = mdb_cursor_get(lm.cursorLog,&key,&data,MDB_NEXT_NODUP); 233 | } 234 | } 235 | 236 | if (what & PRINT_PAGES) 237 | { 238 | printf("-----------------------pagesdb--------------------------\n"); 239 | rc = mdb_cursor_get(lm.cursorPages,&key,&data,MDB_FIRST); 240 | while (rc == MDB_SUCCESS) 241 | { 242 | u64 index; 243 | u32 pgno; 244 | size_t ndupl; 245 | 246 | memcpy(&index, key.mv_data, sizeof(u64)); 247 | memcpy(&pgno, (u8*)key.mv_data + sizeof(u64), sizeof(u32)); 248 | 249 | if (actor == -1 || actor == index) 250 | { 251 | printf("pagesdb: actor=%llu, pgno=%u\n",index, pgno); 252 | mdb_cursor_count(lm.cursorPages,&ndupl); 253 | 254 | op = MDB_FIRST_DUP; 255 | while ((rc = mdb_cursor_get(lm.cursorPages,&key,&data, op)) == MDB_SUCCESS) 256 | { 257 | u64 term,num; 258 | u8 frag; 259 | memcpy(&term, data.mv_data, sizeof(u64)); 260 | memcpy(&num, (u8*)data.mv_data + sizeof(u64), sizeof(u64)); 261 | frag = *(u8*)((u8*)data.mv_data + sizeof(u64)*2); 262 | printf(" evterm=%lld, evnum=%lld, frag=%d, pgsize=%ld\n",term,num,(int)frag,data.mv_size-sizeof(u64)*2-1); 263 | 264 | op = MDB_NEXT_DUP; 265 | ndupl--; 266 | if (ndupl == 0) 267 | break; 268 | } 269 | } 270 | rc = mdb_cursor_get(lm.cursorPages,&key,&data,MDB_NEXT); 271 | } 272 | } 273 | 274 | if (what & PRINT_INFO) 275 | { 276 | printf("-----------------------infodb--------------------------\n"); 277 | rc = mdb_cursor_get(lm.cursorInfo, &key, &data, MDB_FIRST); 278 | while (rc == MDB_SUCCESS) 279 | { 280 | u8 v; 281 | u64 index, fTerm, fEvnum, lTerm, lEvnum, iTerm, iEvnum; 282 | u32 mxPage,allPages; 283 | 284 | memcpy(&index, key.mv_data, sizeof(u64)); 285 | if (actor == -1 || actor == index) 286 | { 287 | v = *(u8*)(data.mv_data); 288 | memcpy(&fTerm, (u8*)data.mv_data+1, sizeof(u64)); 289 | memcpy(&fEvnum, (u8*)data.mv_data+1+sizeof(u64), sizeof(u64)); 290 | memcpy(&lTerm, (u8*)data.mv_data+1+sizeof(u64)*2, sizeof(u64)); 291 | memcpy(&lEvnum, (u8*)data.mv_data+1+sizeof(u64)*3, sizeof(u64)); 292 | memcpy(&iTerm, (u8*)data.mv_data+1+sizeof(u64)*4, sizeof(u64)); 293 | memcpy(&iEvnum, (u8*)data.mv_data+1+sizeof(u64)*5, sizeof(u64)); 294 | memcpy(&mxPage, (u8*)data.mv_data+1+sizeof(u64)*6, sizeof(u32)); 295 | memcpy(&allPages, (u8*)data.mv_data+1+sizeof(u64)*6+sizeof(u32), sizeof(u32)); 296 | 297 | printf("actor=%llu, firstTerm=%llu, firstEvnum=%llu, lastTerm=%llu, lastEvnum=%llu," 298 | "inprogTerm=%llu, inprogEvnum=%llu, mxPage=%u, allPages=%u\n", 299 | index,fTerm,fEvnum,lTerm,lEvnum,iTerm,iEvnum,mxPage,allPages); 300 | } 301 | 302 | rc = mdb_cursor_get(lm.cursorInfo, &key, &data, MDB_NEXT); 303 | } 304 | } 305 | close_env(&lm); 306 | return 0; 307 | } 308 | 309 | // static void sighandle(int sig) 310 | // { 311 | // } 312 | 313 | static int do_backup(const char *src, const char *dst, u8 doCompact) 314 | { 315 | MDB_val key, data; 316 | lmdb rd, wr; 317 | int rc; 318 | int flags = 0; 319 | 320 | #ifdef _WIN32 321 | flags = MDB_CP_COMPACT; 322 | #endif 323 | if (doCompact) 324 | flags = MDB_CP_COMPACT; 325 | // #ifdef SIGPIPE 326 | // signal(SIGPIPE, sighandle); 327 | // #endif 328 | // #ifdef SIGHUP 329 | // signal(SIGHUP, sighandle); 330 | // #endif 331 | // signal(SIGINT, sighandle); 332 | // signal(SIGTERM, sighandle); 333 | 334 | memset(&rd,0,sizeof(rd)); 335 | memset(&wr,0,sizeof(wr)); 336 | 337 | // If destination does not exist, do a simple complete copy 338 | if (file_size(dst) == 0) 339 | { 340 | int rc; 341 | 342 | if (mdb_env_create(&rd.menv) != MDB_SUCCESS) 343 | return -1; 344 | if (mdb_env_open(rd.menv, src, MDB_NOSUBDIR | MDB_RDONLY, 0600) != MDB_SUCCESS) 345 | return -1; 346 | 347 | if (strcmp(dst, "-") == 0) 348 | rc = mdb_env_copyfd(rd.menv, 1); 349 | else 350 | rc = mdb_env_copy2(rd.menv, dst, flags); 351 | if (rc != 0) 352 | fprintf(stderr,"Backup failed %s\n",strerror(rc)); 353 | // goto bckp_done; 354 | } 355 | mdb_txn_commit(rd.txn); 356 | close_env(&rd); 357 | 358 | // if (open_env(&rd, src, MDB_RDONLY) == -1) 359 | // { 360 | // printf("Unable to open source environment\n"); 361 | // return -1; 362 | // } 363 | if ((rc = open_env(&wr, dst, 0)) != 0) 364 | { 365 | printf("Unable to open destination environment %d\n",rc); 366 | return -1; 367 | } 368 | 369 | // Environment has been copied over, now delete state. 370 | // This means cluster configuration (nodes, schema, etc.). 371 | // This way this lmdb file can be used in an entirely different setup. 372 | 373 | rc = mdb_cursor_get(wr.cursorActors,&key,&data,MDB_FIRST); 374 | if (rc != MDB_SUCCESS) 375 | goto bckp_done; 376 | do 377 | { 378 | u8 pagesKeyBuf[sizeof(u64)+sizeof(u32)]; 379 | MDB_val pgKey, pgVal, logKey, logVal; 380 | u32 pgno = 1; 381 | int pgop = MDB_SET; 382 | size_t nmsz = key.mv_size; 383 | char *nm = (char*)key.mv_data; 384 | u64 index,firstCompleteTerm,firstCompleteEvnum; 385 | u8 logKeyBuf[sizeof(u64)*3]; 386 | 387 | // skip actor data 388 | if (nm[0] == '?') 389 | continue; 390 | else if (key.mv_size > 7 && strncmp(nm, "actors/",7) == 0) 391 | continue; 392 | else if (key.mv_size > 7 && strncmp(nm, "shards/",7) == 0) 393 | continue; 394 | 395 | memcpy(&index, data.mv_data, sizeof(u64)); 396 | // printf("Check key %llu\n",index); 397 | key.mv_size = sizeof(index); 398 | key.mv_data = &index; 399 | rc = mdb_get(wr.txn,wr.infodb,&key,&data); 400 | if (rc != MDB_SUCCESS) 401 | { 402 | // printf("No info for: %.*s\n", (int)nmsz, nm); 403 | continue; 404 | } 405 | if (strncmp(nm,"state/global.__state__",nmsz) == 0) 406 | { 407 | MDB_val rnmKey, rnmVal; 408 | 409 | // Delete this entry in actors 410 | mdb_cursor_del(wr.cursorActors,0); 411 | 412 | rnmKey.mv_size = strlen("globalbckp"); 413 | rnmKey.mv_data = "globalbckp"; 414 | rnmVal.mv_size = sizeof(index); 415 | rnmVal.mv_data = &index; 416 | 417 | // Store it under globalbckp 418 | mdb_put(wr.txn, wr.actorsdb, &rnmKey, &rnmVal, 0); 419 | continue; 420 | } 421 | memcpy(&firstCompleteTerm, ((u8*)data.mv_data)+1, sizeof(u64)); 422 | memcpy(&firstCompleteEvnum,((u8*)data.mv_data)+1+sizeof(u64), sizeof(u64)); 423 | 424 | memcpy(pagesKeyBuf, &index,sizeof(u64)); 425 | memcpy(pagesKeyBuf + sizeof(u64), &pgno, sizeof(u32)); 426 | pgKey.mv_data = pagesKeyBuf; 427 | pgKey.mv_size = sizeof(pagesKeyBuf); 428 | while (mdb_cursor_get(wr.cursorPages,&pgKey,&pgVal,pgop) == MDB_SUCCESS) 429 | { 430 | u64 aindex; 431 | memcpy(&aindex,pgKey.mv_data,sizeof(u64)); 432 | if (aindex != index) 433 | break; 434 | mdb_cursor_del(wr.cursorPages, MDB_NODUPDATA); 435 | pgop = MDB_NEXT_NODUP; 436 | } 437 | 438 | memcpy(logKeyBuf, &index, sizeof(u64)); 439 | memcpy(logKeyBuf + sizeof(u64), &firstCompleteTerm, sizeof(u64)); 440 | memcpy(logKeyBuf + sizeof(u64)*2, &firstCompleteEvnum,sizeof(u64)); 441 | logKey.mv_data = logKeyBuf; 442 | logKey.mv_size = sizeof(logKeyBuf); 443 | if (mdb_cursor_get(wr.cursorLog,&logKey,&logVal,MDB_SET) == MDB_SUCCESS) 444 | { 445 | u64 aindex; 446 | 447 | rc = mdb_cursor_del(wr.cursorLog, MDB_NODUPDATA); 448 | while ((mdb_cursor_get(wr.cursorLog,&logKey,&logVal,MDB_NEXT_NODUP)) == MDB_SUCCESS) 449 | { 450 | memcpy(&aindex, logKey.mv_data, sizeof(u64)); 451 | if (index != aindex) 452 | break; 453 | rc = mdb_cursor_del(wr.cursorLog, MDB_NODUPDATA); 454 | } 455 | } 456 | 457 | // delete state (ranges,catchup,state/..) 458 | mdb_cursor_del(wr.cursorActors,0); 459 | }while ((rc = mdb_cursor_get(wr.cursorActors,&key,&data,MDB_NEXT)) == MDB_SUCCESS); 460 | mdb_txn_commit(wr.txn); 461 | wr.txn = NULL; 462 | 463 | // 464 | // Should we support incremental backup from here? It may actually be slower since it involves checking 465 | // every actor if any pages have changed, if so doing inject/rewind. 466 | // 467 | 468 | bckp_done: 469 | close_env(&wr); 470 | return 0; 471 | } 472 | 473 | // static int do_checkpoint(const char *pth) 474 | // { 475 | // lmdb rd; 476 | // u8 pagesKeyBuf[sizeof(u64)+sizeof(u32)]; 477 | // MDB_val pgKey = {0,NULL}, pgVal = {0,NULL}; 478 | // u64 evnum,evterm,aindex = 126; 479 | // u32 pgno = 65; 480 | // int mrc; 481 | 482 | // memcpy(pagesKeyBuf, &aindex,sizeof(u64)); 483 | // memcpy(pagesKeyBuf + sizeof(u64), &pgno, sizeof(u32)); 484 | // pgKey.mv_data = pagesKeyBuf; 485 | // pgKey.mv_size = sizeof(pagesKeyBuf); 486 | 487 | // if (open_env(&rd, pth, 0) == -1) 488 | // { 489 | // fprintf(stderr,"Unable to open source environment\n"); 490 | // return -1; 491 | // } 492 | 493 | // if (mdb_cursor_get(rd.cursorPages,&pgKey,&pgVal,MDB_SET) != MDB_SUCCESS) 494 | // { 495 | // return 0; 496 | // } 497 | // if (mdb_cursor_get(rd.cursorPages,&pgKey,&pgVal,MDB_FIRST_DUP) != MDB_SUCCESS) 498 | // return 0; 499 | 500 | // do{ 501 | // MDB_val pgDelKey = {0,NULL}, pgDelVal = {0,NULL}; 502 | // mdb_cursor_get(rd.cursorPages,&pgDelKey,&pgDelVal,MDB_GET_CURRENT); 503 | // memcpy(&evterm, pgDelVal.mv_data, sizeof(u64)); 504 | // memcpy(&evnum, (u8*)pgDelVal.mv_data+sizeof(u64),sizeof(u64)); 505 | // DBG("next %llu",evnum); 506 | // // if (evnum < 5538) //5654 507 | // if (evnum == 5538) 508 | // { 509 | // mrc = mdb_cursor_del(rd.cursorPages,0); 510 | // if (mrc != MDB_SUCCESS) 511 | // { 512 | // DBG("Unable to delete page on cursor! err=%d, evnum=%llu",mrc,evnum); 513 | // return 1; 514 | // } 515 | // DBG("Deleted"); 516 | // } 517 | 518 | // }while (mdb_cursor_get(rd.cursorPages,&pgKey,&pgVal,MDB_NEXT_DUP) == MDB_SUCCESS); 519 | // mdb_txn_commit(rd.txn); 520 | // return 0; 521 | // } 522 | 523 | static int do_extract(const char *pth, const char *actor, const char *type, const char *dst) 524 | { 525 | lmdb rd; 526 | FILE *f; 527 | iterate_resource iter; 528 | u8 buf[PAGE_BUFF_SIZE]; 529 | u8 hdrbuf[sizeof(u64)*2+sizeof(u32)*2]; 530 | u32 done = 0; 531 | db_connection conn; 532 | int nfilled; 533 | db_thread thr; 534 | char actorpth[512]; 535 | int i, rc; 536 | 537 | memset(&iter,0,sizeof(iterate_resource)); 538 | memset(&thr,0,sizeof(db_thread)); 539 | memset(&conn,0,sizeof(db_connection)); 540 | 541 | if ((rc = open_env(&rd, pth, MDB_RDONLY)) != 0) 542 | { 543 | fprintf(stderr,"Unable to open source environment\n"); 544 | return -1; 545 | } 546 | g_tsd_thread = &thr; 547 | g_tsd_conn = &conn; 548 | 549 | thr.mdb.env = rd.menv; 550 | thr.maxvalsize = mdb_env_get_maxkeysize(rd.menv); 551 | thr.resFrames = alloca((SQLITE_DEFAULT_PAGE_SIZE/thr.maxvalsize + 1)*sizeof(MDB_val)); 552 | thr.mdb.infodb = rd.infodb; 553 | thr.mdb.logdb = rd.logdb; 554 | thr.mdb.pagesdb = rd.pagesdb; 555 | thr.mdb.actorsdb = rd.actorsdb; 556 | thr.mdb.txn = rd.txn; 557 | thr.mdb.cursorLog = rd.cursorLog; 558 | thr.mdb.cursorPages = rd.cursorPages; 559 | thr.mdb.cursorInfo = rd.cursorInfo; 560 | thr.isreadonly = 1; 561 | 562 | // conn.wal.thread = &thr; 563 | // conn.wal.rthread = &thr; 564 | // conn.wal.rthreadId = pthread_self(); 565 | 566 | 567 | for (i = 0; i < 10; i++) 568 | { 569 | if (strcmp("termstore",actor) == 0 && strcmp("termstore",type) == 0) 570 | sprintf(actorpth,"termstore"); 571 | else if (i == 0) 572 | sprintf(actorpth,"actors/%s.%s",actor,type); 573 | else if (i == 1) 574 | sprintf(actorpth,"shards/%s.%s",actor,type); 575 | else if (i == 2) 576 | sprintf(actorpth,"state/%s.%s",actor,type); 577 | else if (i == 3) 578 | sprintf(actorpth,"actors/%s",actor); 579 | else if (i == 4) 580 | sprintf(actorpth,"shards/%s",actor); 581 | else if (i == 5) 582 | sprintf(actorpth,"%s",actor); 583 | else 584 | { 585 | printf("Can not find actor\n"); 586 | return -1; 587 | } 588 | 589 | if (sqlite3WalOpen(NULL, NULL, actorpth, 0, 0, NULL) == SQLITE_ERROR) 590 | continue; 591 | else 592 | break; 593 | } 594 | 595 | if (dst == NULL) 596 | { 597 | char nm[256]; 598 | sprintf(nm,"%s.%s",actor,type); 599 | f = fopen(nm,"wb"); 600 | } 601 | else if (strcmp(dst,"-") == 0) 602 | f = stdout; 603 | else 604 | f = fopen(dst,"wb"); 605 | 606 | if (f == NULL) 607 | { 608 | fprintf(stderr,"Unable to open destination file\n"); 609 | return -1; 610 | } 611 | 612 | while (!done) 613 | { 614 | nfilled = wal_iterate(&conn.wal, &iter, buf, PAGE_BUFF_SIZE, hdrbuf, &done); 615 | if (nfilled > 0) 616 | { 617 | u8 decompr[SQLITE_DEFAULT_PAGE_SIZE]; 618 | int rc; 619 | 620 | rc = LZ4_decompress_safe((char*)buf,(char*)decompr,nfilled,sizeof(decompr)); 621 | if (rc != SQLITE_DEFAULT_PAGE_SIZE) 622 | { 623 | fprintf(stderr,"Decompress bad size=%d\n",rc); 624 | return -1; 625 | } 626 | 627 | fwrite(decompr,1,sizeof(decompr),f); 628 | } 629 | } 630 | fclose(f); 631 | close_env(&rd); 632 | return 0; 633 | } 634 | 635 | 636 | int main(int argc, const char* argv[]) 637 | { 638 | priv_data pd; 639 | g_log = stdout; 640 | g_pd = &pd; 641 | 642 | memset(&pd, 0,sizeof(priv_data)); 643 | // pthread_key_create(&g_tsd_thread, NULL); 644 | 645 | if (argc >= 3 && strcmp(argv[1],"print") == 0) 646 | { 647 | i64 aindex = -1; 648 | int flag; 649 | const char *path; 650 | 651 | if (argc == 3 || strcmp(argv[2],"all") == 0) 652 | { 653 | flag = PRINT_ACTORS | PRINT_LOG | PRINT_INFO | PRINT_PAGES; 654 | path = argc == 3 ? argv[2] : argv[3]; 655 | } 656 | else 657 | { 658 | if (argc == 4) 659 | path = argv[3]; 660 | else if (argc == 5) 661 | { 662 | sscanf(argv[3],"%lld",&aindex); 663 | path = argv[4]; 664 | } 665 | 666 | if (strcmp(argv[2],"info") == 0) 667 | flag = PRINT_INFO; 668 | else if (strcmp(argv[2],"pages") == 0) 669 | flag = PRINT_PAGES; 670 | else if (strcmp(argv[2],"log") == 0) 671 | flag = PRINT_LOG; 672 | else if (strcmp(argv[2],"actors") == 0) 673 | flag = PRINT_ACTORS; 674 | } 675 | do_print(path, aindex, flag); 676 | } 677 | else if (argc == 4 && (strcmp(argv[1],"backup") == 0 || strcmp(argv[1],"compact") == 0)) 678 | { 679 | char ch = 'y'; 680 | 681 | if (file_size(argv[2]) == 0) 682 | { 683 | fprintf(stderr,"Source db empty\n"); 684 | return 0; 685 | } 686 | if (file_size(argv[3]) != 0) 687 | { 688 | fprintf(stderr,"Destination already exists\n"); 689 | return 0; 690 | } 691 | 692 | do_backup(argv[2],argv[3], strcmp(argv[1],"compact") == 0); 693 | } 694 | else if ((argc == 5 || argc == 6) && strcmp(argv[1],"extract") == 0) 695 | { 696 | if (file_size(argv[2]) == 0) 697 | { 698 | fprintf(stderr,"Source db empty\n"); 699 | return 0; 700 | } 701 | 702 | if (argc == 5) 703 | do_extract(argv[2],argv[3],argv[4],NULL); 704 | else 705 | do_extract(argv[2],argv[3],argv[4], argv[5]); 706 | } 707 | // else if (argc == 3 && strcmp(argv[1],"checkpoint") == 0) 708 | // { 709 | // do_checkpoint(argv[2]); 710 | // } 711 | else 712 | { 713 | printf("Backup:\n"); 714 | printf("%s backup src_lmdb dest_lmdb\n",argv[0]); 715 | printf("To backup to stdout, use -\n"); 716 | printf("\n"); 717 | printf("Backup and compact the lmdb file (requires more CPU and is slower):\n"); 718 | printf("%s compact src_lmdb dest_lmdb\n",argv[0]); 719 | printf("\n"); 720 | printf("Extract an individual actor to an sqlite file\n"); 721 | printf("%s extract /path/to/lmdb_file actorname actortype out_file\n",argv[0]); 722 | printf("\n"); 723 | printf("Diagnostic print DB structure. This can be a lot of data!\n"); 724 | printf("Actorindex is optional\n"); 725 | printf("%s print [all|info|pages|log|actors] [actorindex] /path/to/lmdb_file\n",argv[0]); 726 | printf("\n"); 727 | return 1; 728 | } 729 | 730 | return 1; 731 | } 732 | -------------------------------------------------------------------------------- /c_src/wal.c: -------------------------------------------------------------------------------- 1 | /* ActorDB -> sqlite -> lz4 -> REPLICATION pipe -> LMDB 2 | ** WAL is where sqlite ends and lmdb starts. 3 | 4 | ** LMDB schema: 5 | ** - Actors DB: {<>, <>} 6 | ** {"?",MaxInteger} -> when adding actors, increment this value 7 | 8 | ** - Pages DB: {<>, <>} 9 | ** Pages db is a dupsort database. It stores lz4 compressed sqlite pages. There can be multiple 10 | ** pages for one pgno. This is to leave room for replication. 11 | ** When a page is requested from sqlite, it will use the highest commited page. 12 | ** Once replication has run its course, old pages are deleted. 13 | ** Pages that are too large to be placed in a single value are added into multiple dupsort values. FragIndex 14 | ** counts down. If there are 3 pages, first frag will be FragIndex=2. 15 | 16 | ** - Log DB: {<>, <>} 17 | ** Also a dupsort. Every key is one sqlite write transaction. Values are a list of pages 18 | ** that have changed. 19 | 20 | ** - Info DB: {<>, <>} 24 | ** V (version) = 1 25 | ** FirstComplete(term/evnum) - First entry for actor in Log DB. 26 | ** LastComplete(term/evnum) - Last entry in log that is commited. 27 | ** InProgress(term/evnum) - pages from this evnum+evterm combination are not commited. If actor has just opened 28 | ** and it has these values set, it must delete pages to continue. 29 | ** CurrentTerm - raft parameter. This stores the highest term actor has seen 30 | ** VotedFor - which node actor voted for 31 | 32 | ** On writes log, pages and info db are updated. 33 | ** Non-live replication is simply a matter of looking up log and sending the right pages. 34 | ** If stale replication or actor copy, simply traverse actor pages from 0 forward until reaching 35 | ** the end. 36 | 37 | ** Undo is a matter of checking the pages of last write in log db and deleting them in log and pages db. 38 | 39 | ** Endianess: Data is written as is. Practicaly no relevant platforms are in big endian and I can't see 40 | ** a scenario where a lmdb file would be moved between different endian platforms. 41 | 42 | ** Vacuum is not (and should) not be used. A DB that shrinks poses problems for replication. 43 | ** Replication completes with nTruncate set to mx page that we have replicated. This may be 44 | ** actual mxPage but will usually not be. If DB shrank with a write, we don't know if nTruncate is 45 | ** actually new mxPage or not. With regular write nTruncate will always be mxPage. 46 | */ 47 | static int checkpoint(Wal *pWal, u64 evnum); 48 | static int findframe(db_thread *thr, Wal *pWal, Pgno pgno, u32 *piRead, u64 limitTerm, u64 limitEvnum, u64 *outTerm, u64 *outEvnum); 49 | static int storeinfo(Wal *pWal, u64 currentTerm, u8 votedForSize, u8 *votedFor); 50 | static int doundo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx, u8 delPages); 51 | static u64 get8byte(u8* buf); 52 | static void put8byte(u8* buf, u64 num); 53 | static MDB_txn* open_txn(mdbinf *data, int flags); 54 | static int register_actor(u64 index, char *name); 55 | 56 | // 1. Figure out actor index, create one if it does not exist 57 | // 2. check info for evnum/evterm data 58 | int sqlite3WalOpen(sqlite3_vfs *pVfs, sqlite3_file *pDbFd, const char *zWalName,int bNoShm, i64 mxWalSize, Wal **ppWal) 59 | { 60 | MDB_val key, data; 61 | int rc; 62 | 63 | db_thread *thr = g_tsd_thread; 64 | db_connection *conn = g_tsd_conn; 65 | mdbinf * const mdb = &thr->mdb; 66 | Wal *pWal = &conn->wal; 67 | MDB_dbi actorsdb, infodb; 68 | MDB_txn *txn = mdb->txn; 69 | int offset = 0, cutoff = 0, nmLen = 0; 70 | 71 | if (!thr) 72 | return SQLITE_ERROR; 73 | 74 | actorsdb = mdb->actorsdb; 75 | infodb = mdb->infodb; 76 | 77 | if (zWalName[0] == '/') 78 | offset = 1; 79 | nmLen = strlen(zWalName+offset); 80 | if (zWalName[offset+nmLen-1] == 'l' && zWalName[offset+nmLen-2] == 'a' && 81 | zWalName[offset+nmLen-3] == 'w' && zWalName[offset+nmLen-4] == '-') 82 | cutoff = 4; 83 | 84 | DBG("Wal name=%s %lld",zWalName,(i64)txn); 85 | 86 | // shorten size to ignore "-wal" at the end 87 | key.mv_size = nmLen-cutoff; 88 | key.mv_data = (void*)(zWalName+offset);//thr->curConn->dbpath; 89 | rc = mdb_get(txn,actorsdb,&key,&data); 90 | 91 | // This is new actor, assign an index 92 | if (rc == MDB_NOTFOUND) 93 | { 94 | i64 index = 0; 95 | // MDB_val key1 = {1,(void*)"?"}; 96 | #ifndef _TESTAPP_ 97 | qitem *item; 98 | db_command *cmd; 99 | 100 | item = command_create(conn->wthreadind,-1,g_pd); 101 | cmd = (db_command*)item->cmd; 102 | cmd->type = cmd_actorsdb_add; 103 | 104 | index = atomic_fetch_add_explicit(&g_pd->actorIndexes[thr->nEnv], 1, memory_order_relaxed); 105 | pWal->index = index; 106 | 107 | cmd->arg = enif_make_string(item->env,zWalName,ERL_NIF_LATIN1); 108 | cmd->arg1 = enif_make_uint64(item->env, index); 109 | push_command(conn->wthreadind, -1, g_pd, item); 110 | 111 | #else 112 | if (thr->isreadonly) 113 | { 114 | return SQLITE_ERROR; 115 | } 116 | else 117 | { 118 | char filename[MAX_PATHNAME]; 119 | sprintf(filename,"%.*s",(int)(nmLen-cutoff),zWalName+offset); 120 | index = atomic_fetch_add_explicit(&g_pd->actorIndexes[thr->nEnv], 1, memory_order_relaxed); 121 | pWal->index = index; 122 | if (register_actor(index, filename) != SQLITE_OK) 123 | return SQLITE_ERROR; 124 | } 125 | #endif 126 | } 127 | // Actor exists, read evnum/evterm info 128 | else if (rc == MDB_SUCCESS) 129 | { 130 | // data contains index 131 | key = data; 132 | pWal->index = *(i64*)data.mv_data; 133 | DBG("Actor at index %lld",pWal->index); 134 | rc = mdb_get(txn,infodb,&key,&data); 135 | 136 | if (rc == MDB_SUCCESS) 137 | { 138 | if (*(u8*)data.mv_data != 1) 139 | { 140 | return SQLITE_ERROR; 141 | } 142 | memcpy(&pWal->firstCompleteTerm, ((u8*)data.mv_data)+1, sizeof(u64)); 143 | memcpy(&pWal->firstCompleteEvnum,((u8*)data.mv_data)+1+sizeof(u64), sizeof(u64)); 144 | memcpy(&pWal->lastCompleteTerm, ((u8*)data.mv_data)+1+sizeof(u64)*2, sizeof(u64)); 145 | memcpy(&pWal->lastCompleteEvnum, ((u8*)data.mv_data)+1+sizeof(u64)*3, sizeof(u64)); 146 | memcpy(&pWal->inProgressTerm, ((u8*)data.mv_data)+1+sizeof(u64)*4, sizeof(u64)); 147 | memcpy(&pWal->inProgressEvnum, ((u8*)data.mv_data)+1+sizeof(u64)*5, sizeof(u64)); 148 | memcpy(&pWal->mxPage, ((u8*)data.mv_data)+1+sizeof(u64)*6, sizeof(u32)); 149 | memcpy(&pWal->allPages, ((u8*)data.mv_data)+1+sizeof(u64)*6+sizeof(u32),sizeof(u32)); 150 | pWal->readSafeTerm = pWal->lastCompleteTerm; 151 | pWal->readSafeEvnum = pWal->lastCompleteEvnum; 152 | pWal->readSafeMxPage = pWal->mxPage; 153 | 154 | // if (pWal->inProgressTerm != 0) 155 | // { 156 | // doundo(pWal,NULL,NULL,1); 157 | // } 158 | } 159 | else if (rc == MDB_NOTFOUND) 160 | { 161 | // DBG("Info for actor not found")); 162 | } 163 | } 164 | else 165 | { 166 | DBG("Error open=%d",rc); 167 | thr->forceCommit = 2; 168 | return SQLITE_ERROR; 169 | } 170 | 171 | conn->changed = 1; 172 | if (ppWal != NULL) 173 | (*ppWal) = pWal; 174 | return SQLITE_OK; 175 | } 176 | 177 | int register_actor(u64 index, char *name) 178 | { 179 | MDB_val key = {0,NULL}, data = {0, NULL}; 180 | int offset = 0, cutoff = 0, rc; 181 | mdbinf *mdb; 182 | size_t nmLen; 183 | u64 topIndex; 184 | db_thread *thread = g_tsd_thread; 185 | 186 | DBG("REGISTER ACTOR"); 187 | 188 | if (!g_tsd_wmdb) 189 | lock_wtxn(thread->nEnv); 190 | mdb = g_tsd_wmdb; 191 | if (!mdb) 192 | return SQLITE_ERROR; 193 | 194 | if (name[0] == '/') 195 | offset = 1; 196 | nmLen = strlen(name+offset); 197 | if (name[offset+nmLen-1] == 'l' && name[offset+nmLen-2] == 'a' && 198 | name[offset+nmLen-3] == 'w' && name[offset+nmLen-4] == '-') 199 | cutoff = 4; 200 | 201 | key.mv_size = nmLen-cutoff; 202 | key.mv_data = (void*)(name+offset); 203 | data.mv_size = sizeof(u64); 204 | data.mv_data = (void*)&index; 205 | DBG("Writing actors index for=%s",name); 206 | if ((rc = mdb_put(mdb->txn,mdb->actorsdb,&key,&data,0)) != MDB_SUCCESS) 207 | { 208 | DBG("Unable to write actor index!! %llu %d", index, rc); 209 | return SQLITE_ERROR; 210 | } 211 | 212 | key.mv_size = 1; 213 | key.mv_data = (void*)"?"; 214 | 215 | if (mdb_get(mdb->txn,mdb->actorsdb,&key,&data) == MDB_SUCCESS) 216 | memcpy(&topIndex,data.mv_data,sizeof(u64)); 217 | else 218 | topIndex = 0; 219 | 220 | index++; 221 | if (topIndex < index) 222 | { 223 | data.mv_size = sizeof(u64); 224 | data.mv_data = (void*)&index; 225 | 226 | DBG("Writing ? index %lld",index); 227 | if (mdb_put(mdb->txn,mdb->actorsdb,&key,&data,0) != MDB_SUCCESS) 228 | { 229 | DBG("Unable to write ? index!! %llu", index); 230 | return SQLITE_ERROR; 231 | } 232 | } 233 | 234 | thread->pagesChanged++; 235 | return SQLITE_OK; 236 | } 237 | 238 | 239 | int sqlite3WalClose(Wal *pWal,sqlite3* db, int sync_flags, int nBuf, u8 *zBuf) 240 | { 241 | return SQLITE_OK; 242 | } 243 | 244 | /* Set the limiting size of a WAL file. */ 245 | void sqlite3WalLimit(Wal* wal, i64 size) 246 | { 247 | } 248 | 249 | /* Used by readers to open (lock) and close (unlock) a snapshot. A 250 | ** snapshot is like a read-transaction. It is the state of the database 251 | ** at an instant in time. sqlite3WalOpenSnapshot gets a read lock and 252 | ** preserves the current state even if the other threads or processes 253 | ** write to or checkpoint the WAL. sqlite3WalCloseSnapshot() closes the 254 | ** transaction and releases the lock. 255 | */ 256 | int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged) 257 | { 258 | // db_connection* const conn = enif_tsd_get(g_tsd_conn); 259 | db_connection* conn = g_tsd_conn; 260 | *pChanged = conn->changed; 261 | DBG("Begin read trans %d",*pChanged); 262 | if (conn->changed) 263 | conn->changed = 0; 264 | return SQLITE_OK; 265 | } 266 | 267 | void sqlite3WalEndReadTransaction(Wal *pWal) 268 | { 269 | } 270 | 271 | /* Read a page from the write-ahead log, if it is present. */ 272 | int sqlite3WalFindFrame(Wal *pWal, Pgno pgno, u32 *piRead) 273 | { 274 | // db_thread * const thread = enif_tsd_get(g_tsd_thread); 275 | db_thread *thread = g_tsd_thread; 276 | if (!thread) 277 | return SQLITE_ERROR; 278 | /*if (thr->isreadonly) 279 | { 280 | u64 readSafeEvnum, readSafeTerm; 281 | #ifndef _TESTAPP_ 282 | enif_mutex_lock(pWal->mtx); 283 | #endif 284 | readSafeEvnum = pWal->rthread->readSafeEvnum; 285 | readSafeTerm = pWal->rthread->readSafeTerm; 286 | #ifndef _TESTAPP_ 287 | enif_mutex_unlock(pWal->mtx); 288 | #endif 289 | 290 | return findframe(pWal->rthread, pWal, pgno, piRead, readSafeTerm, readSafeEvnum, NULL, NULL); 291 | } 292 | else*/ if (pWal->inProgressTerm > 0 || pWal->inProgressEvnum > 0) 293 | return findframe(thread, pWal, pgno, piRead, pWal->inProgressTerm, 294 | pWal->inProgressEvnum, NULL, NULL); 295 | else 296 | return findframe(thread, pWal, pgno, piRead, pWal->lastCompleteTerm, 297 | pWal->lastCompleteEvnum, NULL, NULL); 298 | } 299 | 300 | static int findframe(db_thread *thr, Wal *pWal, Pgno pgno, u32 *piRead, u64 limitTerm, 301 | u64 limitEvnum, u64 *outTerm, u64 *outEvnum) 302 | { 303 | MDB_val key, data; 304 | int rc; 305 | size_t ndupl = 0; 306 | u8 pagesKeyBuf[sizeof(u64)+sizeof(u32)]; 307 | mdbinf *mdb; 308 | 309 | if (thr->pagesChanged) 310 | { 311 | mdb = g_tsd_wmdb; 312 | } 313 | else 314 | mdb = &thr->mdb; 315 | 316 | track_time(7,thr); 317 | DBG("FIND FRAME pgno=%u, index=%llu, limitterm=%llu, limitevnum=%llu", 318 | pgno,pWal->index,limitTerm,limitEvnum); 319 | 320 | // ** - Pages DB: {<>, 321 | // <>} 322 | memcpy(pagesKeyBuf, &pWal->index,sizeof(u64)); 323 | memcpy(pagesKeyBuf + sizeof(u64), &pgno, sizeof(u32)); 324 | key.mv_size = sizeof(pagesKeyBuf); 325 | key.mv_data = pagesKeyBuf; 326 | 327 | // u32 pgno2 = *(u32*)(key.mv_data+sizeof(u64)); 328 | // DBG("RUN %d",pgno2); 329 | rc = mdb_cursor_get(mdb->cursorPages,&key,&data,MDB_SET_KEY); 330 | if (rc == MDB_SUCCESS) 331 | { 332 | mdb_cursor_count(mdb->cursorPages,&ndupl); 333 | if (ndupl == 0) 334 | { 335 | *piRead = 0; 336 | return SQLITE_OK; 337 | } 338 | rc = mdb_cursor_get(mdb->cursorPages,&key,&data,MDB_LAST_DUP); 339 | if (rc == MDB_SUCCESS) 340 | { 341 | while (1) 342 | { 343 | char frag1 = *(char*)(((char*)data.mv_data)+sizeof(u64)*2); 344 | int frag = frag1; 345 | u64 term, evnum; 346 | 347 | memcpy(&term, data.mv_data, sizeof(u64)); 348 | memcpy(&evnum, ((u8*)data.mv_data) + sizeof(u64), sizeof(u64)); 349 | if (term > limitTerm || evnum > limitEvnum) 350 | { 351 | rc = mdb_cursor_get(mdb->cursorPages,&key,&data,MDB_PREV_DUP); 352 | if (rc == MDB_SUCCESS) { 353 | continue; 354 | } 355 | else 356 | { 357 | DBG("Cant move to prev dup, term=%llu, evnum=%llu," 358 | " limitterm=%llu, limitevnum=%llu",term,evnum,limitTerm,limitEvnum); 359 | *piRead = 0; 360 | break; 361 | } 362 | } 363 | if (outTerm != NULL) 364 | *outTerm = term; 365 | if (outEvnum != NULL) 366 | *outEvnum = evnum; 367 | 368 | DBG("Found page size=%ld, frags=%d",data.mv_size,(int)frag); 369 | thr->nResFrames = frag; 370 | thr->resFrames[frag--] = data; 371 | 372 | while (frag >= 0) 373 | { 374 | rc = mdb_cursor_get(mdb->cursorPages,&key,&data,MDB_PREV_DUP); 375 | if (rc != MDB_SUCCESS) { 376 | break; 377 | } 378 | frag = *(((u8*)data.mv_data)+sizeof(u64)*2); 379 | // DBG("SUCCESS? %d frag=%d, size=%ld",pgno,frag,data.mv_size); 380 | thr->resFrames[frag--] = data; 381 | } 382 | if (rc == MDB_SUCCESS) { 383 | *piRead = 1; 384 | } 385 | break; 386 | } 387 | } 388 | else 389 | { 390 | DBG("Find page no last dup"); 391 | *piRead = 0; 392 | } 393 | } 394 | else if (rc == MDB_NOTFOUND) 395 | { 396 | DBG("Frame not found!"); 397 | *piRead = 0; 398 | } 399 | else 400 | { 401 | DBG("ERROR findframe: %d",rc); 402 | *piRead = 0; 403 | } 404 | return SQLITE_OK; 405 | } 406 | 407 | static int readframe(Wal *pWal, u32 iRead, int nOut, u8 *pOut) 408 | { 409 | int result = 0; 410 | // db_thread * const thr = enif_tsd_get(g_tsd_thread); 411 | db_thread *thr = g_tsd_thread; 412 | 413 | // #ifndef _WIN32 414 | // if (pthread_equal(pthread_self(), pWal->rthreadId)) 415 | // #else 416 | // if (GetCurrentThreadId() == pWal->rthreadId) 417 | // #endif 418 | // thr = pWal->rthread; 419 | // else 420 | // thr = pWal->thread; 421 | 422 | DBG("Read frame"); 423 | // i64 term, evnum; 424 | if (thr->nResFrames == 0) 425 | { 426 | result = LZ4_decompress_safe((((char*)thr->resFrames[0].mv_data)+sizeof(u64)*2+1), 427 | (char*)pOut, 428 | thr->resFrames[0].mv_size-(sizeof(u64)*2+1), 429 | nOut); 430 | #ifdef _TESTDBG_ 431 | if (result > 0) 432 | { 433 | 434 | { 435 | i64 term, evnum; 436 | memcpy(&term, thr->resFrames[0].mv_data, sizeof(u64)); 437 | memcpy(&evnum, (u8*)thr->resFrames[0].mv_data+sizeof(u64), sizeof(u64)); 438 | DBG("Term=%lld, evnum=%lld, framesize=%d", 439 | term,evnum,(int)thr->resFrames[0].mv_size); 440 | } 441 | } 442 | #endif 443 | } 444 | else 445 | { 446 | u8 pagesBuf[PAGE_BUFF_SIZE]; 447 | int frags = thr->nResFrames; 448 | int pos = 0; 449 | 450 | while (frags >= 0) 451 | { 452 | // DBG("Read frame %d",pos); 453 | memcpy(pagesBuf + pos, 454 | ((char*)thr->resFrames[frags].mv_data)+sizeof(u64)*2+1, 455 | thr->resFrames[frags].mv_size-(sizeof(u64)*2+1)); 456 | pos += thr->resFrames[frags].mv_size-(sizeof(u64)*2+1); 457 | frags--; 458 | } 459 | thr->nResFrames = 0; 460 | 461 | result = LZ4_decompress_safe((char*)pagesBuf,(char*)pOut,pos,nOut); 462 | } 463 | return result; 464 | } 465 | 466 | int sqlite3WalReadFrame(Wal *pWal, u32 iRead, int nOut, u8 *pOut) 467 | { 468 | if (readframe(pWal,iRead,nOut,pOut) == SQLITE_DEFAULT_PAGE_SIZE) 469 | return SQLITE_OK; 470 | else 471 | return SQLITE_ERROR; 472 | } 473 | 474 | /* If the WAL is not empty, return the size of the database. */ 475 | Pgno sqlite3WalDbsize(Wal *pWal) 476 | { 477 | if (pWal) 478 | { 479 | DBG("Dbsize %u",pWal->mxPage); 480 | return pWal->mxPage; 481 | } 482 | DBG("Dbsize 0"); 483 | return 0; 484 | } 485 | 486 | /* Obtain or release the WRITER lock. */ 487 | int sqlite3WalBeginWriteTransaction(Wal *pWal) 488 | { 489 | return SQLITE_OK; 490 | } 491 | int sqlite3WalEndWriteTransaction(Wal *pWal) 492 | { 493 | return SQLITE_OK; 494 | } 495 | 496 | static int fillbuff(db_thread *thr, Wal *pWal, iterate_resource *iter, u8* buf, int bufsize) 497 | { 498 | int bufused = 0; 499 | int frags = thr->nResFrames; 500 | int frsize = 0; 501 | const int hsz = (sizeof(u64)*2+1); 502 | 503 | while (frags >= 0) 504 | { 505 | frsize += thr->resFrames[frags].mv_size-hsz; 506 | frags--; 507 | } 508 | 509 | frags = thr->nResFrames; 510 | frsize = 0; 511 | while (frags >= 0) 512 | { 513 | const int pagesz = thr->resFrames[frags].mv_size - hsz; 514 | 515 | memcpy(buf+bufused, (u8*)thr->resFrames[frags].mv_data + hsz, pagesz); 516 | bufused += pagesz; 517 | frags--; 518 | 519 | DBG("bufused=%d, pagesz=%d",bufused,pagesz); 520 | } 521 | thr->nResFrames = 0; 522 | 523 | return bufused; 524 | } 525 | 526 | // return number of bytes written 527 | static int wal_iterate(Wal *pWal, iterate_resource *iter, u8 *buf, int bufsize, u8 *hdr, u32 *done) 528 | { 529 | // db_thread* const thr = enif_tsd_get(g_tsd_thread); 530 | db_thread *thr = g_tsd_thread; 531 | mdbinf* const mdb = &thr->mdb; 532 | u32 mxPage; 533 | u64 readSafeEvnum, readSafeTerm; 534 | 535 | #ifndef _TESTAPP_ 536 | enif_mutex_lock(pWal->mtx); 537 | #endif 538 | readSafeEvnum = pWal->lastCompleteEvnum; 539 | readSafeTerm = pWal->lastCompleteTerm; 540 | mxPage = pWal->mxPage; 541 | #ifndef _TESTAPP_ 542 | enif_mutex_unlock(pWal->mtx); 543 | #endif 544 | 545 | if (!iter->started) 546 | { 547 | if (iter->evnum + iter->evterm == 0) 548 | { 549 | // If any writes come after iterator started, we must ignore those pages. 550 | iter->evnum = readSafeEvnum; 551 | iter->evterm = readSafeTerm; 552 | iter->pgnoPos = 1; 553 | iter->entiredb = 1; 554 | iter->mxPage = mxPage; 555 | if (pWal->mxPage == 0) 556 | { 557 | DBG("ERROR: Iterate on empty DB %llu",pWal->lastCompleteEvnum); 558 | *done = 1; 559 | return 0; 560 | } 561 | } 562 | else 563 | { 564 | // set mxPage to highest pgno we find. 565 | iter->pgnoPos = iter->mxPage = 0; 566 | DBG("Iterate rsterm=%llu rsevnum=%llu",readSafeTerm,readSafeEvnum); 567 | } 568 | iter->started = 1; 569 | } 570 | 571 | // send entire db (without history) 572 | if (iter->entiredb) 573 | { 574 | u32 iRead = 0; 575 | findframe(thr, pWal, iter->pgnoPos, &iRead, iter->evterm, iter->evnum, NULL, NULL); 576 | 577 | if (!iRead) 578 | { 579 | DBG("Iterate did not find page"); 580 | *done = iter->mxPage; 581 | return 0; 582 | } 583 | DBG("Iter pos=%u, mx=%u, safemx=%u",iter->pgnoPos, iter->mxPage, mxPage); 584 | if (iter->pgnoPos == iter->mxPage) 585 | *done = iter->mxPage; 586 | put8byte(hdr, iter->evterm); 587 | put8byte(hdr+sizeof(u64), iter->evnum); 588 | put4byte(hdr+sizeof(u64)*2, iter->pgnoPos); 589 | put4byte(hdr+sizeof(u64)*2+sizeof(u32), *done); 590 | iter->pgnoPos++; 591 | return fillbuff(thr, pWal, iter, buf, bufsize); 592 | } 593 | else 594 | { 595 | MDB_val logKey, logVal; 596 | int logop; 597 | u8 logKeyBuf[sizeof(u64)*3]; 598 | int rc; 599 | // ** - Log DB: {<>, <>} 600 | 601 | memcpy(logKeyBuf, &pWal->index, sizeof(u64)); 602 | memcpy(logKeyBuf + sizeof(u64), &iter->evterm, sizeof(u64)); 603 | memcpy(logKeyBuf + sizeof(u64)*2, &iter->evnum, sizeof(u64)); 604 | logKey.mv_data = logKeyBuf; 605 | logKey.mv_size = sizeof(logKeyBuf); 606 | DBG("iterate looking for, matchterm=%llu matchevnum=%llu",iter->evterm,iter->evnum); 607 | if (mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_SET) != MDB_SUCCESS) 608 | { 609 | // Evterm/evnum combination not found. Check if evnum is there. 610 | // If so return evterm. It will mean a node is in conflict. 611 | DBG("Key not found in log"); 612 | if (readSafeEvnum == iter->evnum) 613 | { 614 | iter->evterm = readSafeTerm; 615 | iter->termMismatch = 1; 616 | } 617 | else 618 | { 619 | memcpy(logKeyBuf, &pWal->index, sizeof(u64)); 620 | memcpy(logKeyBuf + sizeof(u64), &readSafeTerm, sizeof(u64)); 621 | memcpy(logKeyBuf + sizeof(u64)*2, &readSafeEvnum,sizeof(u64)); 622 | if (mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_SET) != MDB_SUCCESS) 623 | { 624 | DBG("Key not found in log for undo"); 625 | *done = 1; 626 | return 0; 627 | } 628 | while (mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_PREV_NODUP) == MDB_SUCCESS) 629 | { 630 | u64 aindex, term, evnum; 631 | 632 | mdb_cursor_get(mdb->cursorLog,&logKey, &logVal, MDB_GET_CURRENT); 633 | memcpy(&aindex, logKey.mv_data, sizeof(u64)); 634 | memcpy(&term, (u8*)logKey.mv_data+sizeof(u64), sizeof(u64)); 635 | memcpy(&evnum, (u8*)logKey.mv_data+sizeof(u64)*2,sizeof(u64)); 636 | 637 | DBG("Iterate on term=%llu, evnum=%llu, looking for=%llu",term,evnum,iter->evnum); 638 | 639 | if (aindex != pWal->index) 640 | break; 641 | if (iter->evnum == evnum) 642 | { 643 | iter->evterm = term; 644 | iter->termMismatch = 1; 645 | break; 646 | } 647 | } 648 | } 649 | 650 | *done = 1; 651 | return 0; 652 | } 653 | 654 | // We start iterate from next evnum not current. Input evterm/evnum is match_index and match_term. 655 | // It needs next. 656 | if (iter->started == 1 && 657 | (rc = mdb_cursor_get(mdb->cursorLog,&logKey, &logVal, MDB_NEXT_NODUP)) != MDB_SUCCESS) 658 | { 659 | *done = 1; 660 | return 0; 661 | } 662 | else 663 | { 664 | u64 aindex; 665 | 666 | rc = mdb_cursor_get(mdb->cursorLog,&logKey, &logVal, MDB_GET_CURRENT); 667 | if (rc != MDB_SUCCESS) 668 | { 669 | *done = 1; 670 | return 0; 671 | } 672 | memcpy(&aindex, (u8*)logKey.mv_data, sizeof(u64)); 673 | memcpy(&iter->evterm, (u8*)logKey.mv_data+sizeof(u64), sizeof(u64)); 674 | memcpy(&iter->evnum, (u8*)logKey.mv_data+sizeof(u64)*2,sizeof(u64)); 675 | if (aindex != pWal->index) 676 | { 677 | *done = 1; 678 | return 0; 679 | } 680 | // To keep from moving iter->evterm/iter->evnum forward more than once. 681 | iter->started = 2; 682 | } 683 | 684 | logop = MDB_FIRST_DUP; 685 | while ((rc = mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,logop)) == MDB_SUCCESS) 686 | { 687 | u64 evnum,evterm; 688 | u32 pgno; 689 | u32 iRead; 690 | 691 | logop = MDB_NEXT_DUP; 692 | 693 | mdb_cursor_get(mdb->cursorLog,&logKey, &logVal, MDB_GET_CURRENT); 694 | memcpy(&pgno,logVal.mv_data,sizeof(u32)); 695 | 696 | DBG("iterate at pgno=%u, pgnopos=%u",pgno,iter->pgnoPos); 697 | 698 | if (pgno <= iter->pgnoPos) 699 | continue; 700 | 701 | findframe(thr, pWal, pgno, &iRead, iter->evterm, iter->evnum, &evterm, &evnum); 702 | 703 | if (iRead == 0) 704 | { 705 | DBG("ERROR: Did not find frame for pgno=%u, evterm=%llu, evnum=%llu", 706 | pgno, iter->evterm, iter->evnum); 707 | *done = 1; 708 | return 0; 709 | } 710 | 711 | if (evterm != iter->evterm || evnum != iter->evnum) 712 | { 713 | DBG("ERROR: Evterm/evnum does not match,looking for: evterm=%llu, evnum=%llu, " 714 | "got: evterm=%llu, evnum=%llu", iter->evterm, iter->evnum, evterm, evnum); 715 | *done = 1; 716 | return 0; 717 | } 718 | 719 | iter->pgnoPos = pgno; 720 | if ((rc = mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,logop)) == MDB_SUCCESS) 721 | *done = 0; 722 | else 723 | *done = iter->pgnoPos; 724 | DBG( "logcursor get next %d, done=%u",rc,*done); 725 | put8byte(hdr, iter->evterm); 726 | put8byte(hdr+sizeof(u64), iter->evnum); 727 | put4byte(hdr+sizeof(u64)*2, iter->pgnoPos); 728 | put4byte(hdr+sizeof(u64)*2+sizeof(u32), *done); 729 | return fillbuff(thr, pWal, iter, buf, bufsize); 730 | } 731 | *done = 1; 732 | return 0; 733 | } 734 | } 735 | 736 | // Delete all pages up to limitEvterm and limitEvnum 737 | static int checkpoint(Wal *pWal, u64 limitEvnum) 738 | { 739 | MDB_val logKey, logVal; 740 | u8 logKeyBuf[sizeof(u64)*3]; 741 | u64 evnum,evterm,aindex; 742 | mdbinf* mdb; 743 | 744 | // db_thread* const thr = enif_tsd_get(g_tsd_thread); 745 | db_thread *thr = g_tsd_thread; 746 | int logop, mrc = MDB_SUCCESS; 747 | // u8 somethingDeleted = 0; 748 | int allPagesDiff = 0; 749 | 750 | if (!thr) 751 | return SQLITE_ERROR; 752 | if (!g_tsd_wmdb) 753 | lock_wtxn(thr->nEnv); 754 | mdb = g_tsd_wmdb; 755 | if (!mdb) 756 | return SQLITE_ERROR; 757 | 758 | // if (pWal->inProgressTerm == 0) 759 | // return SQLITE_OK; 760 | 761 | DBG("checkpoint actor=%llu, fct=%llu, fcev=%llu, limitEvnum=%llu",pWal->index, 762 | pWal->firstCompleteTerm,pWal->firstCompleteEvnum,limitEvnum); 763 | 764 | while (pWal->firstCompleteEvnum < limitEvnum) 765 | { 766 | logKey.mv_data = logKeyBuf; 767 | logKey.mv_size = sizeof(logKeyBuf); 768 | memcpy(logKeyBuf, &pWal->index, sizeof(u64)); 769 | memcpy(logKeyBuf + sizeof(u64), &pWal->firstCompleteTerm, sizeof(u64)); 770 | memcpy(logKeyBuf + sizeof(u64)*2, &pWal->firstCompleteEvnum,sizeof(u64)); 771 | if (mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_SET) != MDB_SUCCESS) 772 | { 773 | DBG("Key not found in log for checkpoint %llu %llu\r\n", 774 | pWal->firstCompleteTerm, pWal->firstCompleteEvnum); 775 | 776 | mrc = mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_FIRST); 777 | if (mrc != MDB_SUCCESS) 778 | return SQLITE_ERROR; 779 | while (aindex != pWal->index) 780 | { 781 | mrc = mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_NEXT); 782 | if (mrc != MDB_SUCCESS) 783 | return SQLITE_ERROR; 784 | aindex = *(u64*)(logKey.mv_data); 785 | } 786 | if (aindex != pWal->index) 787 | return SQLITE_ERROR; 788 | 789 | memcpy(&evterm, (u8*)logKey.mv_data + sizeof(u64), sizeof(u64)); 790 | memcpy(&evnum, (u8*)logKey.mv_data + sizeof(u64)*2, sizeof(u64)); 791 | pWal->firstCompleteTerm = evterm; 792 | pWal->firstCompleteEvnum = evnum; 793 | } 794 | 795 | DBG("checkpoint evnum=%llu",pWal->firstCompleteEvnum); 796 | // For every page here 797 | // ** - Log DB: {<>, <>} 798 | // Delete from 799 | // ** - Pages DB: {<>, <>} 800 | logop = MDB_FIRST_DUP; 801 | while ((mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,logop)) == MDB_SUCCESS) 802 | { 803 | u32 pgno; 804 | size_t ndupl; 805 | u8 pagesKeyBuf[sizeof(u64)+sizeof(u32)]; 806 | MDB_val pgKey = {0,NULL}, pgVal = {0,NULL}; 807 | u64 pgnoLimitEvnum; 808 | 809 | logop = MDB_NEXT_DUP; 810 | memcpy(&pgno, logVal.mv_data,sizeof(u32)); 811 | 812 | DBG("checkpoint pgno=%u",pgno); 813 | 814 | memcpy(pagesKeyBuf, &pWal->index,sizeof(u64)); 815 | memcpy(pagesKeyBuf + sizeof(u64), &pgno, sizeof(u32)); 816 | pgKey.mv_data = pagesKeyBuf; 817 | pgKey.mv_size = sizeof(pagesKeyBuf); 818 | 819 | if (mdb_cursor_get(mdb->cursorPages,&pgKey,&pgVal,MDB_SET) != MDB_SUCCESS) 820 | { 821 | continue; 822 | } 823 | mdb_cursor_count(mdb->cursorPages,&ndupl); 824 | 825 | if (mdb_cursor_get(mdb->cursorPages,&pgKey,&pgVal,MDB_LAST_DUP) == MDB_SUCCESS) 826 | memcpy(&pgnoLimitEvnum, (u8*)pgVal.mv_data+sizeof(u64),sizeof(u64)); 827 | else 828 | continue; 829 | 830 | pgnoLimitEvnum = pgnoLimitEvnum < limitEvnum ? pgnoLimitEvnum : limitEvnum; 831 | 832 | if (mdb_cursor_get(mdb->cursorPages,&pgKey,&pgVal,MDB_FIRST_DUP) != MDB_SUCCESS) 833 | continue; 834 | 835 | do 836 | { 837 | u8 frag; 838 | MDB_val pgDelKey = {0,NULL}, pgDelVal = {0,NULL}; 839 | 840 | mdb_cursor_get(mdb->cursorPages,&pgDelKey,&pgDelVal,MDB_GET_CURRENT); 841 | 842 | frag = *((u8*)pgDelVal.mv_data+sizeof(u64)*2); 843 | memcpy(&evterm, pgDelVal.mv_data, sizeof(u64)); 844 | memcpy(&evnum, (u8*)pgDelVal.mv_data+sizeof(u64),sizeof(u64)); 845 | DBG("limit limitevnum %lld, curnum %lld, dupl %zu, frag=%d", 846 | limitEvnum, evnum, ndupl,(int)frag); 847 | 848 | if (evnum < pgnoLimitEvnum) 849 | { 850 | mrc = mdb_cursor_del(mdb->cursorPages,0); 851 | if (mrc != MDB_SUCCESS) 852 | { 853 | DBG("Unable to delete page on cursor!."); 854 | break; 855 | } 856 | // else 857 | // { 858 | // DBG("Deleted page!"); 859 | // // somethingDeleted = 1; 860 | // } 861 | if (frag == 0) 862 | allPagesDiff++; 863 | } 864 | 865 | ndupl--; 866 | if (!ndupl) 867 | break; 868 | mrc = mdb_cursor_get(mdb->cursorPages,&pgKey,&pgVal,MDB_NEXT_DUP); 869 | } while (mrc == MDB_SUCCESS); 870 | } 871 | 872 | mrc = mdb_cursor_del(mdb->cursorLog,MDB_NODUPDATA); 873 | if (mrc != MDB_SUCCESS) 874 | { 875 | DBG("Can not delete log"); 876 | break; 877 | } 878 | 879 | // move forward 880 | if ((mrc = mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_NEXT_NODUP)) != MDB_SUCCESS) 881 | { 882 | DBG("Unable to move to next log %d",mrc); 883 | break; 884 | } 885 | 886 | // read next key data 887 | memcpy(&aindex, logKey.mv_data, sizeof(u64)); 888 | memcpy(&evterm, (u8*)logKey.mv_data + sizeof(u64), sizeof(u64)); 889 | memcpy(&evnum, (u8*)logKey.mv_data + sizeof(u64)*2, sizeof(u64)); 890 | 891 | if (aindex != pWal->index) 892 | { 893 | DBG("Reached another actor"); 894 | break; 895 | } 896 | pWal->firstCompleteTerm = evterm; 897 | pWal->firstCompleteEvnum = evnum; 898 | pWal->allPages -= allPagesDiff; 899 | allPagesDiff = 0; 900 | DBG("Checkpint fce now=%lld",(u64)evnum); 901 | } 902 | 903 | // no dirty pages, but will write info 904 | if (sqlite3WalFrames(pWal, SQLITE_DEFAULT_PAGE_SIZE, NULL, pWal->mxPage, 1, 0) == SQLITE_OK) 905 | return SQLITE_OK; 906 | else 907 | return SQLITE_ERROR; 908 | } 909 | 910 | 911 | static int doundo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx, u8 delPages) 912 | { 913 | MDB_val logKey, logVal; 914 | MDB_val pgKey, pgVal; 915 | u8 logKeyBuf[sizeof(u64)*3]; 916 | int logop, pgop, rc, mrc; 917 | mdbinf *mdb; 918 | 919 | // db_thread* const thr = enif_tsd_get(g_tsd_thread); 920 | db_thread *thr = g_tsd_thread; 921 | rc = SQLITE_OK; 922 | if (!thr) 923 | return SQLITE_OK; 924 | 925 | if (pWal->inProgressTerm == 0) 926 | return SQLITE_OK; 927 | 928 | if (!g_tsd_wmdb) 929 | lock_wtxn(thr->nEnv); 930 | mdb = g_tsd_wmdb; 931 | if (!mdb) 932 | return SQLITE_ERROR; 933 | 934 | logKey.mv_data = logKeyBuf; 935 | logKey.mv_size = sizeof(logKeyBuf); 936 | 937 | DBG("Undo"); 938 | 939 | // For every page here 940 | // ** - Log DB: {<>, <>} 941 | // Delete from 942 | // ** - Pages DB: {<>, <>} 943 | memcpy(logKeyBuf, &pWal->index, sizeof(u64)); 944 | memcpy(logKeyBuf + sizeof(u64), &pWal->inProgressTerm, sizeof(u64)); 945 | memcpy(logKeyBuf + sizeof(u64)*2, &pWal->inProgressEvnum,sizeof(u64)); 946 | 947 | if (mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_SET) != MDB_SUCCESS) 948 | { 949 | DBG("Key not found in log for undo, index=%llu, term=%llu, evnum=%llu", 950 | pWal->index, pWal->inProgressTerm, pWal->inProgressEvnum); 951 | return SQLITE_OK; 952 | } 953 | logop = MDB_FIRST_DUP; 954 | while ((mrc = mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,logop)) == MDB_SUCCESS) 955 | { 956 | u32 pgno; 957 | u8 pagesKeyBuf[sizeof(u64)+sizeof(u32)]; 958 | u64 term,evnum; 959 | 960 | memcpy(&pgno,logVal.mv_data,sizeof(u32)); 961 | 962 | if (delPages) 963 | { 964 | size_t ndupl; 965 | memcpy(pagesKeyBuf, &pWal->index,sizeof(u64)); 966 | memcpy(pagesKeyBuf + sizeof(u64), &pgno, sizeof(u32)); 967 | pgKey.mv_data = pagesKeyBuf; 968 | pgKey.mv_size = sizeof(pagesKeyBuf); 969 | 970 | DBG("UNDO pgno=%d",pgno); 971 | 972 | pgop = MDB_FIRST_DUP; 973 | if (mdb_cursor_get(mdb->cursorPages,&pgKey,&pgVal,MDB_SET) != MDB_SUCCESS) 974 | { 975 | logop = MDB_NEXT_DUP; 976 | DBG("Key not found in log for undo"); 977 | continue; 978 | } 979 | mdb_cursor_count(mdb->cursorPages,&ndupl); 980 | while (mdb_cursor_get(mdb->cursorPages,&pgKey,&pgVal,pgop) == MDB_SUCCESS) 981 | { 982 | u8 frag = *((u8*)pgVal.mv_data+sizeof(u64)*2); 983 | memcpy(&term, pgVal.mv_data, sizeof(u64)); 984 | memcpy(&evnum,(u8*)pgVal.mv_data+sizeof(u64),sizeof(u64)); 985 | DBG("progress term %lld, progress evnum %lld, curterm %lld, curnum %lld", 986 | pWal->inProgressTerm, pWal->inProgressEvnum, term, evnum); 987 | if (term >= pWal->inProgressTerm && evnum >= pWal->inProgressEvnum) 988 | { 989 | if (mdb_cursor_del(mdb->cursorPages,0) != MDB_SUCCESS) 990 | { 991 | DBG("Can not delete undo"); 992 | rc = SQLITE_ERROR; 993 | break; 994 | } 995 | if (frag == 0) 996 | pWal->allPages--; 997 | ndupl--; 998 | if (!ndupl) 999 | break; 1000 | } 1001 | 1002 | pgop = MDB_NEXT_DUP; 1003 | } 1004 | pWal->inProgressTerm = pWal->inProgressEvnum = 0; 1005 | storeinfo(pWal,0,0,NULL); 1006 | thr->pagesChanged++; 1007 | } 1008 | 1009 | if (xUndo) 1010 | rc = xUndo(pUndoCtx, pgno); 1011 | 1012 | logop = MDB_NEXT_DUP; 1013 | } 1014 | // if (mdb_cursor_del(mdb->cursorLog,MDB_NODUPDATA) != MDB_SUCCESS) 1015 | // { 1016 | // DBG("Unable to cleanup key from logdb")); 1017 | // } 1018 | 1019 | DBG("Undo done!"); 1020 | 1021 | return rc; 1022 | } 1023 | 1024 | /* Undo any frames written (but not committed) to the log */ 1025 | int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx) 1026 | { 1027 | DBG("sqlite3WalUndo"); 1028 | return doundo(pWal,xUndo, pUndoCtx,1); 1029 | } 1030 | 1031 | /* Return an integer that records the current (uncommitted) write 1032 | ** position in the WAL */ 1033 | void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData) 1034 | { 1035 | } 1036 | 1037 | /* Move the write position of the WAL back to iFrame. Called in 1038 | ** response to a ROLLBACK TO command. */ 1039 | int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData) 1040 | { 1041 | return SQLITE_OK; 1042 | } 1043 | 1044 | static int wal_rewind_int(mdbinf *mdb, Wal *pWal, u64 limitEvnum) 1045 | { 1046 | MDB_val logKey, logVal; 1047 | u8 logKeyBuf[sizeof(u64)*3]; 1048 | int allPagesDiff = 0; 1049 | int logop, rc; 1050 | u64 evnum,evterm,aindex; 1051 | 1052 | // u8 somethingDeleted = 0; 1053 | memcpy(logKeyBuf, &pWal->index, sizeof(u64)); 1054 | memcpy(logKeyBuf + sizeof(u64), &pWal->lastCompleteTerm, sizeof(u64)); 1055 | memcpy(logKeyBuf + sizeof(u64)*2, &pWal->lastCompleteEvnum,sizeof(u64)); 1056 | 1057 | logKey.mv_data = logKeyBuf; 1058 | logKey.mv_size = sizeof(logKeyBuf); 1059 | 1060 | if ((rc = mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_SET)) != MDB_SUCCESS) 1061 | { 1062 | DBG("Key not found in log for rewind %llu %llu", 1063 | pWal->lastCompleteTerm,pWal->lastCompleteEvnum); 1064 | return SQLITE_OK; 1065 | } 1066 | 1067 | while (pWal->lastCompleteEvnum >= limitEvnum) 1068 | { 1069 | // mdb_cursor_count(thr->cursorLog,&ndupl); 1070 | // For every page here 1071 | // ** - Log DB: {<>, <>} 1072 | // Delete from 1073 | // ** - Pages DB: {<>, <>} 1074 | logop = MDB_LAST_DUP; 1075 | while ((rc = mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,logop)) == MDB_SUCCESS) 1076 | { 1077 | u32 pgno; 1078 | // u8 rewrite = 0; 1079 | u8 pagesKeyBuf[sizeof(u64)+sizeof(u32)]; 1080 | MDB_val pgKey, pgVal; 1081 | size_t ndupl; 1082 | // size_t rewritePos = 0; 1083 | int pgop; 1084 | 1085 | memcpy(&pgno, logVal.mv_data,sizeof(u32)); 1086 | DBG("Moving to pgno=%u, evnum=%llu",pgno,pWal->lastCompleteEvnum); 1087 | 1088 | memcpy(pagesKeyBuf, &pWal->index,sizeof(u64)); 1089 | memcpy(pagesKeyBuf + sizeof(u64), &pgno, sizeof(u32)); 1090 | pgKey.mv_data = pagesKeyBuf; 1091 | pgKey.mv_size = sizeof(pagesKeyBuf); 1092 | 1093 | logop = MDB_PREV_DUP; 1094 | if (mdb_cursor_get(mdb->cursorPages,&pgKey,&pgVal,MDB_SET) != MDB_SUCCESS) 1095 | { 1096 | continue; 1097 | } 1098 | mdb_cursor_count(mdb->cursorPages,&ndupl); 1099 | if (ndupl == 0) 1100 | continue; 1101 | if (mdb_cursor_get(mdb->cursorPages,&pgKey,&pgVal,MDB_LAST_DUP) != MDB_SUCCESS) 1102 | continue; 1103 | pgop = MDB_PREV_DUP; 1104 | do 1105 | { 1106 | u8 frag; 1107 | MDB_val pgDelKey = {0,NULL}, pgDelVal = {0,NULL}; 1108 | 1109 | if (mdb_cursor_get(mdb->cursorPages,&pgDelKey,&pgDelVal,MDB_GET_CURRENT) != MDB_SUCCESS) 1110 | break; 1111 | frag = *((u8*)pgDelVal.mv_data+sizeof(u64)*2); 1112 | memcpy(&evnum, (u8*)pgDelVal.mv_data+sizeof(u64),sizeof(u64)); 1113 | DBG("Deleting pgno=%u, evnum=%llu",pgno,evnum); 1114 | if (evnum >= limitEvnum) 1115 | { 1116 | // Like checkpoint, we can not trust this will succeed. 1117 | rc = mdb_cursor_del(mdb->cursorPages,0); 1118 | if (rc != MDB_SUCCESS) 1119 | { 1120 | DBG("Unable to delete rewind page!!!"); 1121 | break; 1122 | } 1123 | // else 1124 | // { 1125 | // // This is normal operation. Delete page and set flag 1126 | // // that something is deleted. 1127 | // DBG("Rewind page deleted!"); 1128 | // somethingDeleted = 1; 1129 | // } 1130 | if (frag == 0) 1131 | allPagesDiff++; 1132 | } 1133 | else 1134 | { 1135 | // No ugliness happened. Either there is nothing to delete or 1136 | // we deleted a few pages off the top and we are done. 1137 | break; 1138 | } 1139 | ndupl--; 1140 | if (!ndupl) 1141 | break; 1142 | rc = mdb_cursor_get(mdb->cursorPages,&pgKey,&pgVal,pgop); 1143 | } while (rc == MDB_SUCCESS); 1144 | DBG("Done looping pages %d",rc); 1145 | // If we moved through all pages and rewrite did not happen 1146 | // and this is last page, we have shrunk DB. 1147 | if (!ndupl && pgno == pWal->mxPage) 1148 | pWal->mxPage--; 1149 | } 1150 | if (mdb_cursor_del(mdb->cursorLog,MDB_NODUPDATA) != MDB_SUCCESS) 1151 | { 1152 | DBG("Rewind Unable to cleanup key from logdb"); 1153 | } 1154 | if (mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_PREV_NODUP) != MDB_SUCCESS) 1155 | { 1156 | DBG("Rewind Unable to move to next log"); 1157 | break; 1158 | } 1159 | memcpy(&aindex, logKey.mv_data, sizeof(u64)); 1160 | memcpy(&evterm, (u8*)logKey.mv_data + sizeof(u64), sizeof(u64)); 1161 | memcpy(&evnum, (u8*)logKey.mv_data + sizeof(u64)*2, sizeof(u64)); 1162 | 1163 | if (aindex != pWal->index) 1164 | { 1165 | DBG("Rewind Reached another actor=%llu, me=%llu",aindex,pWal->index); 1166 | break; 1167 | } 1168 | pWal->lastCompleteTerm = evterm; 1169 | pWal->lastCompleteEvnum = evnum; 1170 | pWal->allPages -= allPagesDiff; 1171 | allPagesDiff = 0; 1172 | } 1173 | return SQLITE_OK; 1174 | } 1175 | 1176 | static int storeinfo(Wal *pWal, u64 currentTerm, u8 votedForSize, u8 *votedFor) 1177 | { 1178 | MDB_val key = {0,NULL}, data = {0,NULL}; 1179 | int rc; 1180 | db_thread *thr = g_tsd_thread; 1181 | mdbinf* mdb; 1182 | 1183 | if (!g_tsd_wmdb) 1184 | lock_wtxn(thr->nEnv); 1185 | mdb = g_tsd_wmdb; 1186 | if (!mdb) 1187 | return SQLITE_ERROR; 1188 | 1189 | key.mv_size = sizeof(u64); 1190 | key.mv_data = &pWal->index; 1191 | if (votedFor == NULL) 1192 | { 1193 | rc = mdb_cursor_get(mdb->cursorInfo,&key,&data,MDB_SET_KEY); 1194 | 1195 | if (rc == MDB_SUCCESS && data.mv_size >= (1+sizeof(u64)*6+sizeof(u32)*2+sizeof(u64)+1)) 1196 | { 1197 | memcpy(¤tTerm, (u8*)data.mv_data+1+sizeof(u64)*6+sizeof(u32)*2, sizeof(u64)); 1198 | votedForSize = (u8)((u8*)data.mv_data)[1+sizeof(u64)*6+sizeof(u32)*2+sizeof(u64)]; 1199 | //votedFor = data.mv_data+1+sizeof(u64)*6+sizeof(u32)*2+sizeof(u64)+1; 1200 | votedFor = alloca(votedForSize); 1201 | memcpy(votedFor,(u8*)data.mv_data+1+sizeof(u64)*6+sizeof(u32)*2+sizeof(u64)+1, votedForSize); 1202 | // DBG("Voted for %.*s",(int)votedForSize,(char*)votedFor)); 1203 | } 1204 | } 1205 | key.mv_size = sizeof(u64); 1206 | key.mv_data = &pWal->index; 1207 | data.mv_data = NULL; 1208 | data.mv_size = 1+sizeof(u64)*6+sizeof(u32)*2+sizeof(u64)+1+votedForSize; 1209 | rc = mdb_cursor_put(mdb->cursorInfo,&key,&data,MDB_RESERVE); 1210 | if (rc == MDB_SUCCESS) 1211 | { 1212 | u8 *infoBuf = data.mv_data; 1213 | infoBuf[0] = 1; 1214 | memcpy(infoBuf+1, &pWal->firstCompleteTerm,sizeof(u64)); 1215 | memcpy(infoBuf+1+sizeof(u64), &pWal->firstCompleteEvnum,sizeof(u64)); 1216 | memcpy(infoBuf+1+sizeof(u64)*2, &pWal->lastCompleteTerm, sizeof(u64)); 1217 | memcpy(infoBuf+1+sizeof(u64)*3, &pWal->lastCompleteEvnum,sizeof(u64)); 1218 | memcpy(infoBuf+1+sizeof(u64)*4, &pWal->inProgressTerm, sizeof(u64)); 1219 | memcpy(infoBuf+1+sizeof(u64)*5, &pWal->inProgressEvnum, sizeof(u64)); 1220 | memcpy(infoBuf+1+sizeof(u64)*6, &pWal->mxPage, sizeof(u32)); 1221 | memcpy(infoBuf+1+sizeof(u64)*6+sizeof(u32), &pWal->allPages,sizeof(u32)); 1222 | memcpy(infoBuf+1+sizeof(u64)*6+sizeof(u32)*2, ¤tTerm, sizeof(u64)); 1223 | infoBuf[1+sizeof(u64)*7+sizeof(u32)*2] = votedForSize; 1224 | memcpy(infoBuf+2+sizeof(u64)*7+sizeof(u32)*2, votedFor, votedForSize); 1225 | thr->forceCommit = 1; 1226 | return SQLITE_OK; 1227 | } 1228 | return SQLITE_ERROR; 1229 | } 1230 | 1231 | /* Write a frame or frames to the log. */ 1232 | int sqlite3WalFrames(Wal *pWal, int szPage, PgHdr *pList, Pgno nTruncate, int isCommit, int sync_flags) 1233 | { 1234 | PgHdr *p; 1235 | MDB_val key, data; 1236 | int rc; 1237 | mdbinf* mdb; 1238 | MDB_txn* txn; 1239 | db_thread *thr = g_tsd_thread; 1240 | db_connection* pCon = g_tsd_conn; 1241 | 1242 | DBG("sqlite3WalFrames"); 1243 | 1244 | if (!thr) 1245 | return SQLITE_ERROR; 1246 | if (!g_tsd_wmdb) 1247 | lock_wtxn(thr->nEnv); 1248 | mdb = g_tsd_wmdb; 1249 | txn = mdb->txn; 1250 | 1251 | if (!mdb) 1252 | return SQLITE_ERROR; 1253 | 1254 | key.mv_size = sizeof(u64); 1255 | key.mv_data = (void*)&pWal->index; 1256 | 1257 | // Term/evnum must always be increasing 1258 | while ((pWal->inProgressTerm > 0 && pWal->inProgressTerm < pWal->lastCompleteTerm) || 1259 | (pWal->inProgressEvnum > 0 && pWal->inProgressEvnum < pWal->lastCompleteEvnum)) 1260 | { 1261 | u64 preTerm = pWal->lastCompleteTerm; 1262 | u64 preNum = pWal->lastCompleteEvnum; 1263 | DBG("Must do rewind. inprog_term=%llu, inprog_evnum=%llu, lc_term=%llu, lc_evnum=%llu", 1264 | pWal->inProgressTerm, pWal->inProgressEvnum, pWal->lastCompleteTerm, pWal->lastCompleteEvnum); 1265 | 1266 | wal_rewind_int(mdb,pWal,pWal->lastCompleteEvnum); 1267 | if (preTerm == pWal->lastCompleteTerm && preNum == pWal->lastCompleteEvnum) 1268 | break; 1269 | } 1270 | 1271 | track_time(2,thr); 1272 | // ** - Pages DB: {<>, <>} 1273 | for(p=pList; p; p=p->pDirty) 1274 | { 1275 | u8 pagesKeyBuf[sizeof(u64)+sizeof(u32)]; 1276 | u8 pagesBuf[PAGE_BUFF_SIZE]; 1277 | int full_size = 0; 1278 | int page_size = LZ4_compress_default((char*)p->pData,(char*)pagesBuf+sizeof(u64)*2+1,szPage,sizeof(pagesBuf)); 1279 | char fragment_index = 0; 1280 | int skipped = 0; 1281 | track_time(3,thr); 1282 | 1283 | DBG("Insert frame, actor=%lld, pgno=%u, " 1284 | "term=%lld, evnum=%lld, commit=%d, truncate=%d, compressedsize=%d", 1285 | pWal->index,p->pgno,pWal->inProgressTerm,pWal->inProgressEvnum, 1286 | isCommit,nTruncate,page_size); 1287 | 1288 | if (pCon->doReplicate) 1289 | { 1290 | u8 hdr[sizeof(u64)*2+sizeof(u32)*2]; 1291 | put8byte(hdr, pWal->inProgressTerm); 1292 | put8byte(hdr+sizeof(u64), pWal->inProgressEvnum); 1293 | put4byte(hdr+sizeof(u64)*2, p->pgno); 1294 | if (p->pDirty) 1295 | put4byte(hdr+sizeof(u64)*2+sizeof(u32), 0); 1296 | else 1297 | put4byte(hdr+sizeof(u64)*2+sizeof(u32), nTruncate); 1298 | #ifndef _TESTAPP_ 1299 | wal_page_hook(thr,pagesBuf+sizeof(u64)*2+1, page_size, hdr, sizeof(hdr)); 1300 | #endif 1301 | } 1302 | 1303 | memcpy(pagesKeyBuf, &pWal->index,sizeof(u64)); 1304 | memcpy(pagesKeyBuf + sizeof(u64), &p->pgno, sizeof(u32)); 1305 | key.mv_size = sizeof(pagesKeyBuf); 1306 | key.mv_data = pagesKeyBuf; 1307 | 1308 | 1309 | // Check if there are pages with the same or higher evnum/evterm. If there are, delete them. 1310 | // This can happen if sqlite flushed some page to disk before commiting, because there were 1311 | // so many pages that they could not be held in memory. Or it could happen if pages need to be 1312 | // overwritten because there was a write that did not pass raft consensus. 1313 | rc = mdb_cursor_get(mdb->cursorPages,&key,&data,MDB_SET_KEY); 1314 | if (rc == MDB_SUCCESS) 1315 | { 1316 | size_t ndupl; 1317 | mdb_cursor_count(mdb->cursorPages,&ndupl); 1318 | 1319 | rc = mdb_cursor_get(mdb->cursorPages,&key,&data,MDB_LAST_DUP); 1320 | if (rc == MDB_SUCCESS) 1321 | { 1322 | MDB_val pgDelKey = {0,NULL}, pgDelVal = {0,NULL}; 1323 | u64 evnum, evterm; 1324 | u8 frag = *((u8*)data.mv_data+sizeof(u64)*2); 1325 | memcpy(&evterm, data.mv_data, sizeof(u64)); 1326 | memcpy(&evnum, (u8*)data.mv_data + sizeof(u64), sizeof(u64)); 1327 | 1328 | while ((evterm > pWal->inProgressTerm || evnum >= pWal->inProgressEvnum)) 1329 | //(pWal->inProgressTerm + pWal->inProgressEvnum) > 0) 1330 | { 1331 | DBG("Deleting pages higher or equal to current. " 1332 | "Evterm=%llu, evnum=%llu, curterm=%llu, curevn=%llu, dupl=%ld", 1333 | evterm,evnum,pWal->inProgressTerm,pWal->inProgressEvnum,ndupl); 1334 | 1335 | if (pgDelKey.mv_data != NULL) 1336 | { 1337 | if ((rc = mdb_del(txn,mdb->pagesdb,&pgDelKey,&pgDelVal)) != MDB_SUCCESS) 1338 | { 1339 | DBG("Unable to cleanup page from pagedb %d",rc); 1340 | break; 1341 | } 1342 | pgDelKey.mv_data = NULL; 1343 | } 1344 | mdb_cursor_get(mdb->cursorPages,&pgDelKey,&pgDelVal,MDB_GET_CURRENT); 1345 | 1346 | // if (mdb_cursor_del(mdb->cursorPages,0) != MDB_SUCCESS) 1347 | // { 1348 | // DBG("Cant delete!"); 1349 | // break; 1350 | // } 1351 | 1352 | if (frag == 0) 1353 | pWal->allPages--; 1354 | ndupl--; 1355 | if (!ndupl) 1356 | break; 1357 | rc = mdb_cursor_get(mdb->cursorPages,&key,&data,MDB_PREV_DUP); 1358 | if (rc != MDB_SUCCESS) 1359 | break; 1360 | memcpy(&evterm, data.mv_data, sizeof(u64)); 1361 | memcpy(&evnum, (u8*)data.mv_data + sizeof(u64), sizeof(u64)); 1362 | frag = *((u8*)data.mv_data+sizeof(u64)*2); 1363 | } 1364 | if (pgDelKey.mv_data != NULL) 1365 | { 1366 | if ((rc = mdb_del(txn,mdb->pagesdb,&pgDelKey,&pgDelVal)) != MDB_SUCCESS) 1367 | { 1368 | DBG("Unable to cleanup page from pagedb %d",rc); 1369 | break; 1370 | } 1371 | pgDelKey.mv_data = NULL; 1372 | } 1373 | } 1374 | memcpy(pagesKeyBuf, &pWal->index,sizeof(u64)); 1375 | memcpy(pagesKeyBuf + sizeof(u64), &p->pgno, sizeof(u32)); 1376 | key.mv_size = sizeof(pagesKeyBuf); 1377 | key.mv_data = pagesKeyBuf; 1378 | } 1379 | track_time(4,thr); 1380 | 1381 | memcpy(pagesBuf, &pWal->inProgressTerm, sizeof(u64)); 1382 | memcpy(pagesBuf + sizeof(u64), &pWal->inProgressEvnum, sizeof(u64)); 1383 | 1384 | full_size = page_size + sizeof(u64)*2 + 1; 1385 | if (full_size < thr->maxvalsize) 1386 | fragment_index = 0; 1387 | else 1388 | { 1389 | full_size = page_size; 1390 | skipped = thr->maxvalsize - sizeof(u64)*2 - 1; 1391 | full_size -= skipped; 1392 | while(full_size > 0) 1393 | { 1394 | full_size -= (thr->maxvalsize - sizeof(u64)*2 - 1); 1395 | fragment_index++; 1396 | } 1397 | full_size = page_size + sizeof(u64)*2 +1; 1398 | } 1399 | 1400 | pagesBuf[sizeof(u64)*2] = fragment_index; 1401 | data.mv_size = fragment_index == 0 ? full_size : thr->maxvalsize; 1402 | data.mv_data = pagesBuf; 1403 | 1404 | // fragment_index == 0 ? MDB_APPENDDUP : 0 1405 | if ((rc = mdb_cursor_put(mdb->cursorPages,&key,&data,0)) != MDB_SUCCESS) 1406 | { 1407 | // printf("Cursor put failed to pages %d",rc); 1408 | DBG("ERROR: cursor put failed=%d, datasize=%d",rc,full_size); 1409 | return SQLITE_ERROR; 1410 | } 1411 | 1412 | fragment_index--; 1413 | skipped = data.mv_size; 1414 | while (fragment_index >= 0) 1415 | { 1416 | DBG("Insert fragment %d",(int)fragment_index); 1417 | if (fragment_index == 0) 1418 | data.mv_size = full_size - skipped + sizeof(u64)*2 + 1; 1419 | else 1420 | data.mv_size = thr->maxvalsize; 1421 | data.mv_data = pagesBuf + skipped - (sizeof(u64)*2+1); 1422 | memcpy(pagesBuf + skipped - (sizeof(u64)*2+1), &pWal->inProgressTerm, sizeof(u64)); 1423 | memcpy(pagesBuf + skipped - (sizeof(u64)+1), &pWal->inProgressEvnum, sizeof(u64)); 1424 | pagesBuf[skipped-1] = fragment_index; 1425 | 1426 | if ((rc = mdb_cursor_put(mdb->cursorPages,&key,&data,0)) != MDB_SUCCESS) 1427 | { 1428 | DBG("ERROR: cursor secondary put failed: err=%d, datasize=%d, skipped=%d, frag=%d", 1429 | rc,full_size, skipped, (int)fragment_index); 1430 | return SQLITE_ERROR; 1431 | } 1432 | skipped += data.mv_size - sizeof(u64)*2 - 1; 1433 | fragment_index--; 1434 | } 1435 | 1436 | thr->pagesChanged++; 1437 | } 1438 | // printf(""); 1439 | // ** - Log DB: {<>, <>} 1440 | if (pWal->inProgressTerm > 0) 1441 | { 1442 | for(p=pList; p; p=p->pDirty) 1443 | { 1444 | u8 logKeyBuf[sizeof(u64)*3]; 1445 | 1446 | DBG("Inserting to log"); 1447 | 1448 | memcpy(logKeyBuf, &pWal->index, sizeof(u64)); 1449 | memcpy(logKeyBuf + sizeof(u64), &pWal->inProgressTerm, sizeof(u64)); 1450 | memcpy(logKeyBuf + sizeof(u64)*2, &pWal->inProgressEvnum, sizeof(u64)); 1451 | key.mv_size = sizeof(logKeyBuf); 1452 | key.mv_data = logKeyBuf; 1453 | 1454 | data.mv_size = sizeof(u32); 1455 | data.mv_data = &p->pgno; 1456 | 1457 | if (mdb_cursor_put(mdb->cursorLog,&key,&data,0) != MDB_SUCCESS) 1458 | { 1459 | // printf("Cursor put failed to log"); 1460 | DBG("ERROR: cursor put to log failed: %d",rc); 1461 | return SQLITE_ERROR; 1462 | } 1463 | 1464 | pWal->allPages++; 1465 | } 1466 | } 1467 | else 1468 | { 1469 | DBG("Skipping log"); 1470 | for(p=pList; p; p=p->pDirty) 1471 | pWal->allPages++; 1472 | } 1473 | /** - Info DB: {<>, <>} */ 1476 | { 1477 | if (isCommit) 1478 | { 1479 | DBG("Commit actor=%llu fct=%llu, fcev=%llu, lct=%llu, lcev=%llu, int=%llu, inev=%llu", 1480 | pWal->index, 1481 | pWal->firstCompleteTerm, pWal->firstCompleteEvnum, pWal->lastCompleteTerm, 1482 | pWal->lastCompleteEvnum, pWal->inProgressTerm,pWal->inProgressEvnum); 1483 | 1484 | #ifndef _TESTAPP_ 1485 | enif_mutex_lock(pWal->mtx); 1486 | #endif 1487 | pWal->lastCompleteTerm = pWal->inProgressTerm > 0 ? 1488 | pWal->inProgressTerm : pWal->lastCompleteTerm; 1489 | pWal->lastCompleteEvnum = pWal->inProgressEvnum > 0 ? 1490 | pWal->inProgressEvnum : pWal->lastCompleteEvnum; 1491 | if (pWal->firstCompleteTerm == 0) 1492 | { 1493 | pWal->firstCompleteTerm = pWal->inProgressTerm; 1494 | pWal->firstCompleteEvnum = pWal->inProgressEvnum; 1495 | } 1496 | pWal->inProgressTerm = pWal->inProgressEvnum = 0; 1497 | pWal->mxPage = pWal->mxPage > nTruncate ? pWal->mxPage : nTruncate; 1498 | // pWal->changed = 0; 1499 | thr->forceCommit = 1; 1500 | pCon->dirty = 0; 1501 | #ifndef _TESTAPP_ 1502 | enif_mutex_unlock(pWal->mtx); 1503 | #endif 1504 | DBG("cur mxpage=%u",pWal->mxPage); 1505 | } 1506 | else 1507 | { 1508 | // pWal->changed = 1; 1509 | pCon->dirty = 1; 1510 | } 1511 | thr->pagesChanged++; 1512 | 1513 | rc = storeinfo(pWal,0,0,NULL); 1514 | if (rc != SQLITE_OK) 1515 | return rc; 1516 | 1517 | track_time(5,thr); 1518 | } 1519 | 1520 | return SQLITE_OK; 1521 | } 1522 | 1523 | 1524 | 1525 | /* Copy pages from the log to the database file */ 1526 | int sqlite3WalCheckpoint( 1527 | Wal *pWal, /* Write-ahead log connection */ 1528 | sqlite3 *db, 1529 | int eMode, /* One of PASSIVE, FULL and RESTART */ 1530 | int (*xBusy)(void*), /* Function to call when busy */ 1531 | void *pBusyArg, /* Context argument for xBusyHandler */ 1532 | int sync_flags, /* Flags to sync db file with (or 0) */ 1533 | int nBuf, /* Size of buffer nBuf */ 1534 | u8 *zBuf, /* Temporary buffer to use */ 1535 | int *pnLog, /* OUT: Number of frames in WAL */ 1536 | int *pnCkpt /* OUT: Number of backfilled frames in WAL */ 1537 | ) 1538 | { 1539 | DBG("Checkpoint"); 1540 | return SQLITE_OK; 1541 | } 1542 | 1543 | 1544 | 1545 | /* Return the value to pass to a sqlite3_wal_hook callback, the 1546 | ** number of frames in the WAL at the point of the last commit since 1547 | ** sqlite3WalCallback() was called. If no commits have occurred since 1548 | ** the last call, then return 0. 1549 | */ 1550 | int sqlite3WalCallback(Wal *pWal) 1551 | { 1552 | return SQLITE_OK; 1553 | } 1554 | 1555 | /* Tell the wal layer that an EXCLUSIVE lock has been obtained (or released) 1556 | ** by the pager layer on the database file. 1557 | */ 1558 | int sqlite3WalExclusiveMode(Wal *pWal, int op) 1559 | { 1560 | return SQLITE_OK; 1561 | } 1562 | 1563 | /* Return true if the argument is non-NULL and the WAL module is using 1564 | ** heap-memory for the wal-index. Otherwise, if the argument is NULL or the 1565 | ** WAL module is using shared-memory, return false. 1566 | */ 1567 | int sqlite3WalHeapMemory(Wal *pWal) 1568 | { 1569 | DBG("heap"); 1570 | return pWal != NULL; 1571 | } 1572 | 1573 | 1574 | 1575 | static u64 get8byte(u8* buf) 1576 | { 1577 | return ((u64)buf[0] << 56) + ((u64)buf[1] << 48) + ((u64)buf[2] << 40) + ((u64)buf[3] << 32) + 1578 | ((u64)buf[4] << 24) + ((u64)buf[5] << 16) + ((u64)buf[6] << 8) + buf[7]; 1579 | } 1580 | static void put8byte(u8* buf, u64 num) 1581 | { 1582 | buf[0] = num >> 56; 1583 | buf[1] = num >> 48; 1584 | buf[2] = num >> 40; 1585 | buf[3] = num >> 32; 1586 | buf[4] = num >> 24; 1587 | buf[5] = num >> 16; 1588 | buf[6] = num >> 8; 1589 | buf[7] = num; 1590 | } 1591 | 1592 | static int logdb_cmp(const MDB_val *a, const MDB_val *b) 1593 | { 1594 | i64 aActor,aEvterm,aEvnum,bActor,bEvterm,bEvnum; 1595 | int diff; 1596 | 1597 | memcpy(&aActor,a->mv_data,sizeof(i64)); 1598 | memcpy(&bActor,b->mv_data,sizeof(i64)); 1599 | diff = aActor - bActor; 1600 | if (diff == 0) 1601 | { 1602 | memcpy(&aEvterm, (u8*)a->mv_data+sizeof(i64), sizeof(i64)); 1603 | memcpy(&bEvterm, (u8*)b->mv_data+sizeof(i64), sizeof(i64)); 1604 | diff = aEvterm - bEvterm; 1605 | if (diff == 0) 1606 | { 1607 | memcpy(&aEvnum, (u8*)a->mv_data+sizeof(i64)*2, sizeof(i64)); 1608 | memcpy(&bEvnum, (u8*)b->mv_data+sizeof(i64)*2, sizeof(i64)); 1609 | return aEvnum - bEvnum; 1610 | } 1611 | return diff; 1612 | } 1613 | return diff; 1614 | } 1615 | 1616 | static int pagesdb_cmp(const MDB_val *a, const MDB_val *b) 1617 | { 1618 | u64 aActor; 1619 | u64 bActor; 1620 | u32 aPgno; 1621 | u32 bPgno; 1622 | int diff; 1623 | 1624 | memcpy(&aActor,a->mv_data,sizeof(u64)); 1625 | memcpy(&bActor,b->mv_data,sizeof(u64)); 1626 | diff = aActor - bActor; 1627 | if (diff == 0) 1628 | { 1629 | memcpy(&aPgno,(u8*)a->mv_data + sizeof(u64),sizeof(u32)); 1630 | memcpy(&bPgno,(u8*)b->mv_data + sizeof(u64),sizeof(u32)); 1631 | return aPgno - bPgno; 1632 | } 1633 | return diff; 1634 | } 1635 | 1636 | static int pagesdb_val_cmp(const MDB_val *a, const MDB_val *b) 1637 | { 1638 | u64 aEvterm,aEvnum; 1639 | u64 bEvterm,bEvnum; 1640 | u8 aCounter, bCounter; 1641 | int diff; 1642 | 1643 | memcpy(&aEvterm, a->mv_data, sizeof(u64)); 1644 | memcpy(&bEvterm, b->mv_data, sizeof(u64)); 1645 | diff = aEvterm - bEvterm; 1646 | if (diff == 0) 1647 | { 1648 | memcpy(&aEvnum, (u8*)a->mv_data+sizeof(u64), sizeof(u64)); 1649 | memcpy(&bEvnum, (u8*)b->mv_data+sizeof(u64), sizeof(u64)); 1650 | diff = aEvnum - bEvnum; 1651 | if (diff == 0) 1652 | { 1653 | aCounter = ((u8*)a->mv_data)[sizeof(u64)*2]; 1654 | bCounter = ((u8*)b->mv_data)[sizeof(u64)*2]; 1655 | return aCounter - bCounter; 1656 | } 1657 | return diff; 1658 | } 1659 | return diff; 1660 | } 1661 | 1662 | static MDB_txn* open_txn(mdbinf *data, int flags) 1663 | { 1664 | if (mdb_txn_begin(data->env, NULL, flags, &data->txn) != MDB_SUCCESS) 1665 | return NULL; 1666 | // if (mdb_dbi_open(data->txn, "info", MDB_INTEGERKEY, &data->infodb) != MDB_SUCCESS) 1667 | // return NULL; 1668 | // if (mdb_dbi_open(data->txn, "actors", 0, &data->actorsdb) != MDB_SUCCESS) 1669 | // return NULL; 1670 | // if (mdb_dbi_open(data->txn, "log", MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &data->logdb) != MDB_SUCCESS) 1671 | // return NULL; 1672 | // if (mdb_dbi_open(data->txn, "pages", MDB_DUPSORT, &data->pagesdb) != MDB_SUCCESS) 1673 | // return NULL; 1674 | if (mdb_set_compare(data->txn, data->logdb, logdb_cmp) != MDB_SUCCESS) 1675 | return NULL; 1676 | if (mdb_set_compare(data->txn, data->pagesdb, pagesdb_cmp) != MDB_SUCCESS) 1677 | return NULL; 1678 | if (mdb_set_dupsort(data->txn, data->pagesdb, pagesdb_val_cmp) != MDB_SUCCESS) 1679 | return NULL; 1680 | if (mdb_cursor_open(data->txn, data->logdb, &data->cursorLog) != MDB_SUCCESS) 1681 | return NULL; 1682 | if (mdb_cursor_open(data->txn, data->pagesdb, &data->cursorPages) != MDB_SUCCESS) 1683 | return NULL; 1684 | if (mdb_cursor_open(data->txn, data->infodb, &data->cursorInfo) != MDB_SUCCESS) 1685 | return NULL; 1686 | 1687 | return data->txn; 1688 | } 1689 | 1690 | sqlite3_file *sqlite3WalFile(Wal *pWal) 1691 | { 1692 | return NULL; 1693 | } 1694 | --------------------------------------------------------------------------------