├── .gitattributes ├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── artree ├── artree.c ├── artree.h ├── artree_cursor.c ├── artree_delete.c ├── artree_find.c ├── artree_insert.c └── artree_uniq.c ├── arxiv.1009.2764.pdf ├── base64.c ├── base64.h ├── btree1 ├── btree1.c ├── btree1.h ├── btree1_cursor.c ├── btree1_delete.c ├── btree1_find.c ├── btree1_insert.c ├── btree1_util.c └── readme.md ├── btree2 ├── btree2.c ├── btree2.h ├── btree2_cursor.c ├── btree2_delete.c ├── btree2_find.c ├── btree2_insert.c ├── btree2_skip.c ├── btree2_slot.h ├── btree2_util.c └── readme.md ├── build ├── build.bat ├── build.osx ├── build.wsl ├── database.sln ├── database.vcxproj ├── database.vcxproj.filters ├── db.h ├── db_api.c ├── db_api.h ├── db_arena.c ├── db_arena.h ├── db_cputime.c ├── db_cursor.c ├── db_cursor.h ├── db_drop.c ├── db_error.h ├── db_frame.c ├── db_frame.h ├── db_handle.c ├── db_handle.h ├── db_index.h ├── db_iterator.c ├── db_iterator.h ├── db_malloc.c ├── db_malloc.h ├── db_map.c ├── db_map.h ├── db_object.c ├── db_object.h ├── db_params.c ├── db_redblack.c ├── db_redblack.h ├── db_skiplist.c ├── db_skiplist.h ├── implementation guide ├── mvcc.h ├── mvcc_dbapi.c ├── mvcc_dbapi.h ├── mvcc_dbdoc.c ├── mvcc_dbdoc.h ├── mvcc_dbidx.c ├── mvcc_dbidx.h ├── mvcc_dbssn.h ├── mvcc_dbssn1.c ├── mvcc_dbssn2.c ├── mvcc_dbssn3.c ├── mvcc_dbssn4.c ├── mvcc_dbtxn.c ├── mvcc_dbtxn.h ├── oldbtree1.h ├── standalone.c ├── testfiles ├── test1 ├── test1.bat ├── test1.wsl ├── test2.bat ├── test2.wsl ├── test3.bat ├── test3.wsl ├── test4.bat ├── test4.wsl ├── test5.bat ├── test5.wsl ├── test6.bat ├── test6.wsl ├── test7.bat ├── test7.wsl └── test8.bat └── vcvars.bat /.gitattributes: -------------------------------------------------------------------------------- 1 | * text eol=lf 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.userosscache 8 | *.sln.docstates 9 | *.swp 10 | *.exe 11 | 12 | # User-specific files (MonoDevelop/Xamarin Studio) 13 | *.userprefs 14 | 15 | # Build results 16 | [Dd]ebug/ 17 | [Dd]ebugPublic/ 18 | [Rr]elease/ 19 | [Rr]eleases/ 20 | [Xx]64/ 21 | [Xx]86/ 22 | [Bb]uild/ 23 | bld/ 24 | [Bb]in/ 25 | [Oo]bj/ 26 | 27 | # Visual Studio 2015 cache/options directory 28 | .vs/ 29 | # Uncomment if you have tasks that create the project's static files in wwwroot 30 | #wwwroot/ 31 | 32 | # MSTest test Results 33 | [Tt]est[Rr]esult*/ 34 | [Bb]uild[Ll]og.* 35 | 36 | # NUNIT 37 | *.VisualState.xml 38 | TestResult.xml 39 | 40 | # Build Results of an ATL Project 41 | [Dd]ebugPS/ 42 | [Rr]eleasePS/ 43 | dlldata.c 44 | 45 | # DNX 46 | project.lock.json 47 | artifacts/ 48 | 49 | *_i.c 50 | *_p.c 51 | *_i.h 52 | *.ilk 53 | *.meta 54 | *.obj 55 | *.pch 56 | *.pdb 57 | *.pgc 58 | *.pgd 59 | *.rsp 60 | *.sbr 61 | *.tlb 62 | *.tli 63 | *.tlh 64 | *.tmp 65 | *.tmp_proj 66 | *.log 67 | *.vspscc 68 | *.vssscc 69 | .builds 70 | *.pidb 71 | *.svclog 72 | *.scc 73 | 74 | # Chutzpah Test files 75 | _Chutzpah* 76 | 77 | # Visual C++ cache files 78 | ipch/ 79 | *.aps 80 | *.ncb 81 | *.opendb 82 | *.opensdf 83 | *.sdf 84 | *.cachefile 85 | *.VC.db 86 | 87 | # Visual Studio profiler 88 | *.psess 89 | *.vsp 90 | *.vspx 91 | *.sap 92 | 93 | # TFS 2012 Local Workspace 94 | $tf/ 95 | 96 | # Guidance Automation Toolkit 97 | *.gpState 98 | 99 | # ReSharper is a .NET coding add-in 100 | _ReSharper*/ 101 | *.[Rr]e[Ss]harper 102 | *.DotSettings.user 103 | 104 | # JustCode is a .NET coding add-in 105 | .JustCode 106 | 107 | # TeamCity is a build add-in 108 | _TeamCity* 109 | 110 | # DotCover is a Code Coverage Tool 111 | *.dotCover 112 | 113 | # NCrunch 114 | _NCrunch_* 115 | .*crunch*.local.xml 116 | nCrunchTemp_* 117 | 118 | # MightyMoose 119 | *.mm.* 120 | AutoTest.Net/ 121 | 122 | # Web workbench (sass) 123 | .sass-cache/ 124 | 125 | # Installshield output folder 126 | [Ee]xpress/ 127 | 128 | # DocProject is a documentation generator add-in 129 | DocProject/buildhelp/ 130 | DocProject/Help/*.HxT 131 | DocProject/Help/*.HxC 132 | DocProject/Help/*.hhc 133 | DocProject/Help/*.hhk 134 | DocProject/Help/*.hhp 135 | DocProject/Help/Html2 136 | DocProject/Help/html 137 | 138 | # Click-Once directory 139 | publish/ 140 | 141 | # Publish Web Output 142 | *.[Pp]ublish.xml 143 | *.azurePubxml 144 | 145 | # TODO: Un-comment the next line if you do not want to checkin 146 | # your web deploy settings because they may include unencrypted 147 | # passwords 148 | #*.pubxml 149 | *.publishproj 150 | 151 | # NuGet Packages 152 | *.nupkg 153 | # The packages folder can be ignored because of Package Restore 154 | **/packages/* 155 | # except build/, which is used as an MSBuild target. 156 | !**/packages/build/ 157 | # Uncomment if necessary however generally it will be regenerated when needed 158 | #!**/packages/repositories.config 159 | # NuGet v3's project.json files produces more ignoreable files 160 | *.nuget.props 161 | *.nuget.targets 162 | 163 | # Microsoft Azure Build Output 164 | csx/ 165 | *.build.csdef 166 | 167 | # Microsoft Azure Emulator 168 | ecf/ 169 | rcf/ 170 | 171 | # Microsoft Azure ApplicationInsights config file 172 | ApplicationInsights.config 173 | 174 | # Windows Store app package directory 175 | AppPackages/ 176 | BundleArtifacts/ 177 | 178 | # Visual Studio cache files 179 | # files ending in .cache can be ignored 180 | *.[Cc]ache 181 | # but keep track of directories ending in .cache 182 | !*.[Cc]ache/ 183 | 184 | # Others 185 | ClientBin/ 186 | [Ss]tyle[Cc]op.* 187 | ~$* 188 | *~ 189 | *.dbmdl 190 | *.dbproj.schemaview 191 | *.pfx 192 | *.publishsettings 193 | node_modules/ 194 | orleans.codegen.cs 195 | 196 | # RIA/Silverlight projects 197 | Generated_Code/ 198 | 199 | # Backup & report files from converting an old project file 200 | # to a newer Visual Studio version. Backup files are not needed, 201 | # because we have git ;-) 202 | _UpgradeReport_Files/ 203 | Backup*/ 204 | UpgradeLog*.XML 205 | UpgradeLog*.htm 206 | 207 | # SQL Server files 208 | *.mdf 209 | *.ldf 210 | 211 | # Business Intelligence projects 212 | *.rdl.data 213 | *.bim.layout 214 | *.bim_*.settings 215 | 216 | # Microsoft Fakes 217 | FakesAssemblies/ 218 | 219 | # GhostDoc plugin setting file 220 | *.GhostDoc.xml 221 | 222 | # Node.js Tools for Visual Studio 223 | .ntvs_analysis.dat 224 | 225 | # Visual Studio 6 build log 226 | *.plg 227 | 228 | # Visual Studio 6 workspace options file 229 | *.opt 230 | 231 | # Visual Studio LightSwitch build output 232 | **/*.HTMLClient/GeneratedArtifacts 233 | **/*.DesktopClient/GeneratedArtifacts 234 | **/*.DesktopClient/ModelManifest.xml 235 | **/*.Server/GeneratedArtifacts 236 | **/*.Server/ModelManifest.xml 237 | _Pvt_Extensions 238 | 239 | # LightSwitch generated files 240 | GeneratedArtifacts/ 241 | ModelManifest.xml 242 | 243 | # Paket dependency manager 244 | .paket/paket.exe 245 | 246 | # FAKE - F# Make 247 | .fake/ 248 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "mutex"] 2 | path = mutex 3 | url = https://github.com/malbrain/mutex 4 | [submodule "rwlock"] 5 | path = rwlock 6 | url = https://github.com/malbrain/rwlock 7 | [submodule "Hi-Performance-Timestamps"] 8 | path = Hi-Performance-Timestamps 9 | url = https://github.com/malbrain/Hi-Performance-Timestamps 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2019, Karl Malbrain 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /artree/artree.c: -------------------------------------------------------------------------------- 1 | #include "artree.h" 2 | 3 | // initialize ARTree 4 | 5 | DbStatus artInit(Handle *hndl, Params *params) { 6 | DbMap *artMap = MapAddr(hndl); 7 | ArtIndex *artIndex = artindex(artMap); 8 | 9 | artIndex->dbIndex->delimFlds = artMap->arenaDef->params[IdxKeyFlds].charVal; 10 | artIndex->dbIndex->uniqueKeys = 11 | artMap->arenaDef->params[IdxKeyUnique].boolVal; 12 | 13 | artMap->arena->type[0] = Hndl_artIndex; 14 | return DB_OK; 15 | } -------------------------------------------------------------------------------- /artree/artree.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../base64.h" 3 | #include "../db.h" 4 | 5 | #include "../db_arena.h" 6 | #include "../db_map.h" 7 | #include "../db_api.h" 8 | #include "../db_object.h" 9 | #include "../db_handle.h" 10 | #include "../db_cursor.h" 11 | #include "../db_frame.h" 12 | #include 13 | 14 | #define MAX_cursor 4096 15 | 16 | // Artree interior nodes 17 | 18 | enum ARTNodeType { 19 | UnusedSlot = 0, // 0: slot is not yet in use 20 | Array4, // 1: node contains 4 radix slots 21 | Array14, // 2: node contains 14 radix slots 22 | Array64, // 3: node contains 64 radix slots 23 | Array256, // 4: node contains 256 radix slots 24 | FldEnd, // 5: node ends a binary string field 25 | KeyEnd, // 6: node ends the end of a key value 26 | SuffixEnd, // 7: node ends a suffix string 27 | SpanNode, // 8: node contains up to 8 key bytes 28 | MaxARTType = SpanNode + 17 // 8-24: node spans up to 256 bytes 29 | }; 30 | 31 | /** 32 | * field value ends for binary strings option 33 | */ 34 | 35 | typedef struct { 36 | DbAddr sameFld[1]; // more bytes from the same field 37 | DbAddr nextFld[1]; // end this field and start next 38 | } ARTFldEnd; 39 | 40 | /** 41 | * key is a prefix of another longer key 42 | */ 43 | 44 | typedef struct { 45 | DbAddr next[1]; 46 | DbAddr suffix[1]; // end key and continue w/ set of unique suffix strings 47 | } ARTKeyEnd; 48 | 49 | /** 50 | * radix node with four slots and their key bytes 51 | */ 52 | 53 | typedef struct { 54 | uint8_t alloc; 55 | uint8_t keys[4]; 56 | uint8_t filler[3]; 57 | DbAddr radix[4]; 58 | } ARTNode4; 59 | 60 | /** 61 | * radix node with fourteen slots and their key bytes 62 | */ 63 | 64 | typedef struct { 65 | uint16_t alloc; 66 | uint8_t keys[14]; 67 | DbAddr radix[14]; 68 | } ARTNode14; 69 | 70 | /** 71 | * radix node with sixty-four slots and a 256 key byte array 72 | */ 73 | 74 | typedef struct { 75 | uint64_t alloc; 76 | uint8_t keys[256]; 77 | DbAddr radix[64]; 78 | } ARTNode64; 79 | 80 | /** 81 | * radix node all 256 slots 82 | */ 83 | 84 | typedef struct { 85 | DbAddr radix[256]; 86 | } ARTNode256; 87 | 88 | /** 89 | * span node containing up to 8 consecutive key bytes 90 | * span nodes are used to compress linear chains of key bytes 91 | */ 92 | 93 | typedef struct { 94 | DbAddr next[1]; // next node after span 95 | uint8_t bytes[8]; 96 | } ARTSpan; 97 | 98 | /** 99 | * Index arena definition 100 | */ 101 | 102 | typedef struct { 103 | DbIndex dbIndex[1]; 104 | DbAddr root[1]; // root of the arttree 105 | } ArtIndex; 106 | 107 | typedef struct { 108 | DbAddr *addr; // tree addr of slot 109 | DbAddr slot[1]; // slot that points to node 110 | uint16_t off; // offset within key 111 | uint16_t lastFld; // offset of current field 112 | int16_t ch; // character of key 113 | uint16_t startFld; // flag to start field 114 | } CursorStack; 115 | 116 | typedef struct { 117 | DbCursor base[1]; 118 | uint16_t depth; // current depth of cursor 119 | uint16_t fldLen; // length remaining in current field 120 | char delimFlds; // keys have binary fields 121 | char inSuffix; // binaryFlds in suffix string 122 | uint8_t key[MAX_key]; // current cursor key 123 | CursorStack stack[MAX_cursor]; // cursor stack 124 | } ArtCursor; 125 | 126 | typedef struct { 127 | DbAddr *slot; 128 | DbAddr *prev; 129 | DbAddr oldSlot[1]; 130 | DbAddr newSlot[1]; 131 | 132 | DbMap *idxMap; 133 | DbStatus stat; 134 | Handle *index; 135 | uint8_t *key; 136 | 137 | uint16_t keyLen; // length of the key 138 | uint16_t off; // progress down the key bytes 139 | uint16_t lastFld; // previous field start 140 | uint16_t fldLen; // length remaining in current field 141 | uint8_t ch; // current key character 142 | char delimFlds; // keys have binary fields 143 | uint8_t restart; // restart insert from beginning 144 | } InsertParam; 145 | 146 | #define artindex(map) ((ArtIndex *)(map->arena + 1)) 147 | 148 | DbStatus artNewCursor(DbCursor *cursor, DbMap *map); 149 | DbStatus artReturnCursor(DbCursor *dbCursor, DbMap *map); 150 | 151 | DbStatus artLeftKey(DbCursor *cursor, DbMap *map); 152 | DbStatus artRightKey(DbCursor *cursor, DbMap *map); 153 | 154 | DbStatus artFindKey( DbCursor *dbCursor, DbMap *map, uint8_t *key, uint16_t keyLen, uint16_t suffixLen); 155 | DbStatus artNextKey(DbCursor *dbCursor, DbMap *map); 156 | DbStatus artPrevKey(DbCursor *dbCursor, DbMap *map); 157 | 158 | DbStatus artInit(Handle *hndl, Params *params); 159 | DbStatus artDeleteKey (Handle *hndl, uint8_t *key, uint16_t keyLen, uint16_t suffixLen); 160 | DbStatus artInsertKey (Handle *hndl, DbKeyValue *kv, uint8_t lvl); 161 | DbStatus artInsertUniq (Handle *hndl, uint8_t *key, uint16_t keyLen, uint16_t suffixLen, UniqCbFcn *fcn, bool *defer); 162 | DbStatus artEvalUniq( DbMap *map, uint8_t *key, uint16_t keyLen, uint16_t suffixLen, UniqCbFcn *evalFcn); 163 | 164 | uint64_t artAllocateNode(Handle *index, int type, uint32_t size); 165 | 166 | bool artInsertParam (InsertParam *p); 167 | -------------------------------------------------------------------------------- /artree/artree_delete.c: -------------------------------------------------------------------------------- 1 | #include "artree.h" 2 | 3 | typedef enum { 4 | ContinueSearch, 5 | EndSearch, 6 | RetrySearch, 7 | RestartSearch, 8 | ErrorSearch 9 | } ReturnState; 10 | 11 | 12 | DbStatus artDeleteKey(Handle *index, uint8_t *key, uint16_t keyLen, uint16_t suffixLen) { 13 | uint8_t tmpCursor[sizeof(DbCursor) + sizeof(ArtCursor)]; 14 | DbMap *idxMap = MapAddr(index); 15 | ReturnState rt = ErrorSearch; 16 | DbCursor *dbCursor; 17 | ArtCursor *cursor; 18 | DbAddr newSlot; 19 | DbStatus stat; 20 | uint32_t bit; 21 | uint8_t ch; 22 | 23 | memset(tmpCursor, 0, sizeof(tmpCursor)); 24 | 25 | dbCursor = (DbCursor *)tmpCursor; 26 | cursor = (ArtCursor *)(tmpCursor + sizeof(DbCursor)); 27 | 28 | dbCursor->key = cursor->key; 29 | 30 | if ((stat = artFindKey(dbCursor, idxMap, key, keyLen, suffixLen))) 31 | return stat; 32 | 33 | // we take the trie nodes in the cursor stack 34 | // and go through them backwards to remove empties. 35 | 36 | if (cursor->depth) 37 | if (cursor->stack[cursor->depth - 1].addr->type == KeyEnd) { 38 | while (cursor->depth) { 39 | CursorStack *stack = &cursor->stack[--cursor->depth]; 40 | uint32_t pass = 0; 41 | bool retry = true; 42 | 43 | ch = (uint8_t)stack->ch; 44 | 45 | // wait if we run into a dead slot 46 | do { 47 | if (pass) 48 | yield(); 49 | else 50 | pass = 1; 51 | 52 | // obtain write lock on the node 53 | 54 | lockLatch(stack->addr->latch); 55 | newSlot.bits = stack->addr->bits; 56 | 57 | if ((retry = newSlot.kill)) 58 | unlockLatch(stack->addr->latch); 59 | } while (retry); 60 | 61 | switch (newSlot.type < SpanNode ? newSlot.type : SpanNode) { 62 | case UnusedSlot: { 63 | continue; 64 | } 65 | 66 | case FldEnd: { 67 | ARTFldEnd* fldEndNode = getObj(idxMap, *stack->addr); 68 | stack->addr->bits = fldEndNode->sameFld->bits; 69 | fldEndNode->nextFld->bits = 0; 70 | 71 | if(addSlotToFrame(idxMap, idxMap->arena->usrFrame[newSlot.type].freeFrame, newSlot.bits)) { 72 | if (stack->addr->type) 73 | rt = EndSearch; 74 | else 75 | continue; 76 | } else 77 | rt = ErrorSearch; 78 | 79 | break; 80 | } 81 | 82 | /* case KeyUniq: { 83 | ARTKeyUniq* keyUniqNode = getObj(map, *stack->addr); 84 | 85 | if (stack->off == uniqueLen) { 86 | stack->addr->bits = keyUniqNode->dups->bits; 87 | keyUniqNode->dups->bits = 0; 88 | } else { 89 | stack->addr->bits = keyUniqNode->next->bits; 90 | keyUniqNode->next->bits = 0; 91 | } 92 | 93 | if(addSlotToFrame(map, listHead(index,newSlot.type), listWait(index,newSlot.type), newSlot.bits)) { 94 | if (stack->addr->type) 95 | rt = EndSearch; 96 | else 97 | continue; 98 | } else 99 | rt = ErrorSearch; 100 | 101 | break; 102 | } 103 | */ 104 | case KeyEnd: { 105 | if (newSlot.addr) { // is there a continuation? 106 | ARTKeyEnd* keyEndNode = getObj(idxMap, *stack->addr); 107 | stack->addr->bits = keyEndNode->next->bits; 108 | keyEndNode->next->bits = 0; 109 | 110 | if(addSlotToFrame(idxMap, idxMap->arena->usrFrame[newSlot.type].freeFrame, newSlot.bits)) { 111 | if (stack->addr->type) 112 | rt = EndSearch; 113 | else 114 | continue; 115 | } else 116 | rt = ErrorSearch; 117 | } 118 | 119 | break; 120 | } 121 | 122 | case SpanNode: { 123 | stack->addr->bits = 0; 124 | 125 | if(addSlotToFrame(idxMap, idxMap->arena->usrFrame[newSlot.type].freeFrame, newSlot.bits)) 126 | continue; 127 | 128 | rt = ErrorSearch; 129 | break; 130 | } 131 | 132 | case Array4: { 133 | ARTNode4 *node = getObj(idxMap, *stack->addr); 134 | 135 | for (bit = 0; bit < 4; bit++) { 136 | if (node->alloc & (1 << bit)) 137 | if (ch == node->keys[bit]) 138 | break; 139 | } 140 | 141 | if (bit == 4) { 142 | rt = EndSearch; // key byte not found 143 | break; 144 | } 145 | 146 | // we are not the last entry in the node? 147 | 148 | node->alloc &= ~(1 << bit); 149 | 150 | if (node->alloc) { 151 | rt = EndSearch; 152 | break; 153 | } 154 | 155 | stack->addr->bits = 0; 156 | 157 | if(addSlotToFrame(idxMap, idxMap->arena->usrFrame[newSlot.type].freeFrame, newSlot.bits)) 158 | continue; 159 | 160 | rt = ErrorSearch; 161 | break; 162 | } 163 | 164 | case Array14: { 165 | ARTNode14 *node = getObj(idxMap, *stack->addr); 166 | 167 | for (bit = 0; bit < 14; bit++) { 168 | if (node->alloc & (1 << bit)) 169 | if (ch == node->keys[bit]) 170 | break; 171 | } 172 | 173 | if (bit == 14) { 174 | rt = EndSearch; // key byte not found 175 | break; 176 | } 177 | 178 | // we are not the last entry in the node? 179 | 180 | node->alloc &= ~(1 << bit); 181 | 182 | if (node->alloc) { 183 | rt = EndSearch; 184 | break; 185 | } 186 | 187 | stack->addr->bits = 0; 188 | 189 | if(addSlotToFrame(idxMap, idxMap->arena->usrFrame[newSlot.type].freeFrame, newSlot.bits)) 190 | continue; 191 | 192 | rt = ErrorSearch; 193 | break; 194 | } 195 | 196 | case Array64: { 197 | ARTNode64 *node = getObj(idxMap, *stack->addr); 198 | bit = node->keys[ch]; 199 | 200 | if (bit == 0xff) { 201 | rt = EndSearch; 202 | break; 203 | } 204 | 205 | node->keys[ch] = 0xff; 206 | node->alloc &= ~(1ULL << bit); 207 | 208 | if (node->alloc) { 209 | rt = EndSearch; 210 | break; 211 | } 212 | 213 | stack->addr->bits = 0; 214 | 215 | if(addSlotToFrame(idxMap, idxMap->arena->usrFrame[newSlot.type].freeFrame, newSlot.bits)) 216 | continue; 217 | 218 | rt = ErrorSearch; 219 | break; 220 | } 221 | 222 | case Array256: { 223 | ARTNode256 *node = getObj(idxMap, *stack->addr); 224 | bit = ch; 225 | 226 | // is radix slot empty? 227 | if (!node->radix[bit].type) { 228 | rt = EndSearch; 229 | break; 230 | } 231 | 232 | // was this the last used slot? 233 | if (--stack->addr->nslot) { 234 | rt = EndSearch; 235 | break; 236 | } 237 | 238 | // remove the slot 239 | stack->addr->bits = 0; 240 | 241 | if(addSlotToFrame(idxMap, idxMap->arena->usrFrame[newSlot.type].freeFrame, newSlot.bits)) 242 | continue; 243 | 244 | rt = ErrorSearch; 245 | break; 246 | } 247 | } // end switch 248 | 249 | unlockLatch(stack->addr->latch); 250 | break; 251 | 252 | } // end while 253 | } // end if 254 | 255 | return rt == EndSearch ? DB_OK : DB_ERROR_deletekey; 256 | } 257 | -------------------------------------------------------------------------------- /artree/artree_find.c: -------------------------------------------------------------------------------- 1 | #include "artree.h" 2 | 3 | DbStatus artFindKey( DbCursor *dbCursor, DbMap *map, uint8_t *findKey, uint16_t keyLen, uint16_t suffix) { 4 | ArtCursor *cursor = (ArtCursor *)dbCursor; 5 | uint32_t fldLen = 0, idx, offset = 0, spanMax; 6 | bool binaryFlds = map->arenaDef->params[IdxKeyFlds].charVal; 7 | CursorStack* stack = NULL; 8 | uint8_t *key = findKey; 9 | DbAddr *slot; 10 | ArtIndex *artIdx; 11 | uint16_t lastFld = 0; 12 | 13 | artIdx = artindex(map); 14 | 15 | dbCursor->keyLen = 0; 16 | cursor->depth = 0; 17 | 18 | // loop through the key bytes 19 | // 20 | slot = artIdx->root; 21 | 22 | if (binaryFlds && !dbCursor->keyLen) { 23 | fldLen = key[offset] << 8 | key[offset + 1]; 24 | dbCursor->keyLen = 2; 25 | offset += 2; 26 | } 27 | 28 | while (offset < keyLen) { 29 | if (cursor->depth < MAX_cursor) 30 | stack = cursor->stack + cursor->depth++; 31 | else 32 | return DB_ERROR_cursoroverflow; 33 | 34 | stack->slot->bits = slot->bits; 35 | stack->off = dbCursor->keyLen; 36 | stack->ch = key[offset]; 37 | stack->addr = slot; 38 | 39 | switch (slot->type < SpanNode ? slot->type : SpanNode) { 40 | case FldEnd: { 41 | // this case only occurs with binaryFlds 42 | 43 | ARTFldEnd *fldEndNode = getObj(map, *slot); 44 | 45 | // do we need to finish the search key field? 46 | 47 | if (fldLen) { 48 | slot = fldEndNode->sameFld; 49 | stack->ch = 256; 50 | continue; 51 | } 52 | 53 | if (cursor) { 54 | lastFld = dbCursor->keyLen; 55 | fldLen = key[offset] << 8 | key[offset + 1]; 56 | dbCursor->keyLen += 2; 57 | stack->ch = 256; 58 | offset += 2; 59 | } 60 | 61 | slot = fldEndNode->nextFld; 62 | continue; 63 | } 64 | 65 | /* case KeyUniq: { 66 | ARTKeyUniq* keyUniqNode = getObj(map, *slot); 67 | 68 | if (stack->off == uniqueLen) 69 | slot = keyUniqNode->dups; 70 | else 71 | slot = keyUniqNode->next; 72 | 73 | if (cursor) 74 | stack->ch = 256; 75 | 76 | continue; 77 | } 78 | */ 79 | case KeyEnd: { 80 | if (slot->addr) { // do key bytes fork here? 81 | ARTKeyEnd* keyEndNode = getObj(map, *slot); 82 | slot = keyEndNode->next; 83 | 84 | if (cursor) 85 | stack->ch = 256; 86 | 87 | continue; 88 | } 89 | 90 | // otherwise our key isn't here 91 | 92 | break; 93 | } 94 | 95 | case SpanNode: { 96 | ARTSpan* spanNode = getObj(map, *slot); 97 | uint32_t amt = keyLen - offset; 98 | int diff; 99 | 100 | spanMax = slot->nbyte + 1; 101 | 102 | if (amt > spanMax) 103 | amt = spanMax; 104 | 105 | diff = memcmp(key + offset, spanNode->bytes, amt); 106 | 107 | // does the key end inside the span? 108 | 109 | if (spanMax > amt || diff) 110 | break; 111 | 112 | // continue to the next slot 113 | 114 | dbCursor->keyLen += spanMax; 115 | slot = spanNode->next; 116 | offset += spanMax; 117 | 118 | if (binaryFlds) 119 | fldLen -= spanMax; 120 | 121 | continue; 122 | } 123 | 124 | case Array4: { 125 | ARTNode4 *node = getObj(map, *slot); 126 | 127 | // simple loop comparing bytes 128 | 129 | for (idx = 0; idx < 4; idx++) 130 | if (node->alloc & (1 << idx)) 131 | if (key[offset] == node->keys[idx]) 132 | break; 133 | 134 | if (idx < 4) { 135 | slot = node->radix + idx; 136 | dbCursor->keyLen++; 137 | 138 | if (binaryFlds) 139 | fldLen--; 140 | 141 | offset++; 142 | continue; 143 | } 144 | 145 | // key byte not found 146 | 147 | break; 148 | } 149 | 150 | case Array14: { 151 | ARTNode14 *node = getObj(map, *slot); 152 | 153 | // simple loop comparing bytes 154 | 155 | for (idx = 0; idx < 14; idx++) 156 | if (node->alloc & (1 << idx)) 157 | if (key[offset] == node->keys[idx]) 158 | break; 159 | 160 | if (idx < 14) { 161 | slot = node->radix + idx; 162 | dbCursor->keyLen++; 163 | 164 | if (binaryFlds) 165 | fldLen--; 166 | 167 | offset++; 168 | continue; 169 | } 170 | 171 | // key byte not found 172 | 173 | break; 174 | } 175 | 176 | case Array64: { 177 | ARTNode64* node = getObj(map, *slot); 178 | idx = node->keys[key[offset]]; 179 | 180 | if (idx < 0xff && (node->alloc & (1ULL << idx))) { 181 | slot = node->radix + idx; 182 | dbCursor->keyLen++; 183 | 184 | if (binaryFlds) 185 | fldLen--; 186 | 187 | offset++; 188 | continue; 189 | } 190 | 191 | // key byte not found 192 | 193 | break; 194 | } 195 | 196 | case Array256: { 197 | ARTNode256* node = getObj(map, *slot); 198 | idx = key[offset]; 199 | 200 | if (node->radix[idx].type) { 201 | slot = node->radix + idx; 202 | dbCursor->keyLen++; 203 | 204 | if (binaryFlds) 205 | fldLen--; 206 | 207 | offset++; 208 | continue; 209 | } 210 | 211 | // key byte not found 212 | 213 | break; 214 | } 215 | 216 | case UnusedSlot: { 217 | dbCursor->state = CursorRightEof; 218 | return DB_OK; 219 | } 220 | } // end switch 221 | 222 | break; 223 | } // end while (offset < keylen) 224 | 225 | memcpy (cursor->key, key, dbCursor->keyLen); 226 | 227 | // did we end on a complete key? 228 | 229 | if (slot->type == KeyEnd) 230 | dbCursor->state = CursorPosAt; 231 | else 232 | dbCursor->state = CursorPosBefore; 233 | 234 | // add the terminal node to the cursor 235 | 236 | if (cursor->depth < MAX_cursor) 237 | stack = cursor->stack + cursor->depth++; 238 | else 239 | return DB_ERROR_cursoroverflow; 240 | 241 | stack->slot->bits = slot->bits; 242 | stack->off = dbCursor->keyLen; 243 | stack->addr = slot; 244 | stack->ch = -1; 245 | return DB_OK; 246 | } 247 | -------------------------------------------------------------------------------- /artree/artree_uniq.c: -------------------------------------------------------------------------------- 1 | #include "artree.h" 2 | #include 3 | /* 4 | bool evalUniq(DbMap *map, ARTKeyUniq *keyUniqNode, UniqCbFcn *evalFcn); 5 | 6 | // insert unique key 7 | // clear defer if unique 8 | 9 | DbStatus artInsertUniq( Handle *index, void *key, uint32_t uniqueLen, uint32_t suffixLen, UniqCbFcn *evalFcn, bool *defer) { 10 | DbMap idxMap = MapAddr(index); 11 | volatile DbAddr *uniq, slot; 12 | ARTKeyUniq *keyUniqNode; 13 | bool pass = false; 14 | InsertParam p[1]; 15 | ArtIndex *artIdx; 16 | 17 | artIdx = artindex(idxMap); 18 | 19 | memset(p, 0, sizeof(p)); 20 | p->binaryFlds = idxMap->arenaDef->params[IdxKeyFlds].boolVal; 21 | 22 | do { 23 | p->slot = artIdx->root; 24 | p->keyLen = uniqueLen; 25 | p->restart = false; 26 | p->index = index; 27 | p->fldLen = 0; 28 | p->key = key; 29 | p->off = 0; 30 | 31 | // we encountered a dead node 32 | 33 | if (pass) { 34 | pass = false; 35 | yield(); 36 | } 37 | 38 | if (!artInsertParam(p)) 39 | continue; 40 | 41 | // latch the terminal node 42 | 43 | lockLatch(p->slot->latch); 44 | 45 | // end the uniq portion of the path with a KeyUniq 46 | 47 | if (p->slot->type == KeyUniq) { 48 | keyUniqNode = getObj(idxMap, *p->slot); 49 | slot.bits = p->slot->bits; 50 | break; 51 | } 52 | 53 | if ((slot.bits = artAllocateNode(index, KeyUniq, sizeof(ARTKeyUniq)))) { 54 | keyUniqNode = getObj(idxMap, slot); 55 | keyUniqNode->next->bits = p->slot->bits & ~ADDR_MUTEX_SET; 56 | p->slot->bits = slot.bits | ADDR_MUTEX_SET; 57 | break; 58 | } 59 | 60 | unlockLatch(p->slot->latch); 61 | return DB_ERROR_outofmemory; 62 | } while (!p->stat && (pass = p->restart)); 63 | 64 | if (p->stat) 65 | return p->stat; 66 | 67 | // remember our locked KeyUniq node 68 | 69 | uniq = p->slot; 70 | 71 | // evaluate uniqueness violation 72 | 73 | if (!keyUniqNode->dups->bits) 74 | *defer = false; // no other keys 75 | else if (evalUniq(idxMap, keyUniqNode, evalFcn)) 76 | *defer = false; // no conflicting keys 77 | else if (!*defer) 78 | return DB_ERROR_unique_key_constraint; 79 | 80 | // install the suffix key bytes 81 | 82 | p->keyLen += suffixLen; 83 | pass = false; 84 | 85 | do { 86 | p->slot = keyUniqNode->dups; 87 | p->off = uniqueLen; 88 | 89 | if (pass) { 90 | pass = false; 91 | yield(); 92 | } 93 | 94 | if (!artInsertParam(p)) 95 | continue; 96 | 97 | // duplicate key? 98 | 99 | if (p->slot->type == KeyEnd) 100 | break; 101 | 102 | // if not, splice in a KeyEnd node to end the key 103 | 104 | lockLatch(p->slot->latch); 105 | 106 | // check duplicate again after getting lock 107 | 108 | if (p->slot->type == KeyEnd) { 109 | unlockLatch(p->slot->latch); 110 | break; 111 | } 112 | 113 | // end the key path with a zero-addr KeyEnd 114 | 115 | if (p->slot->type == UnusedSlot) { 116 | p->slot->bits = (uint64_t)KeyEnd << TYPE_SHIFT; 117 | break; 118 | } 119 | 120 | // splice in a new KeyEnd node 121 | 122 | if ((slot.bits = artAllocateNode(index, KeyEnd, sizeof(ARTKeyEnd)))) { 123 | ARTKeyEnd *keyEndNode = getObj(idxMap, slot); 124 | keyEndNode->next->bits = p->slot->bits & ~ADDR_MUTEX_SET; 125 | 126 | p->slot->bits = slot.bits; 127 | break; 128 | } 129 | 130 | unlockLatch(p->slot->latch); 131 | return DB_ERROR_outofmemory; 132 | } while (!p->stat && (pass = p->restart)); 133 | 134 | unlockLatch(uniq->latch); 135 | return DB_OK; 136 | } 137 | 138 | DbStatus artEvalUniq( DbMap *map, void *key, uint32_t uniqueLen, uint32_t suffix, UniqCbFcn *evalFcn) { 139 | uint8_t area[sizeof(ArtCursor) + sizeof(DbCursor)]; 140 | ARTKeyUniq *keyUniqNode; 141 | volatile DbAddr *uniq; 142 | DbCursor *dbCursor; 143 | CursorStack* stack; 144 | ArtCursor *cursor; 145 | ArtIndex *artIdx; 146 | DbStatus stat; 147 | bool isDup; 148 | 149 | artIdx = artindex(map); 150 | 151 | dbCursor = (DbCursor *)(area); 152 | memset(dbCursor, 0, sizeof(DbCursor)); 153 | 154 | dbCursor->state = CursorPosAt; 155 | 156 | cursor = (ArtCursor *)dbCursor; 157 | memset(cursor, 0, offsetof(ArtCursor, key)); 158 | dbCursor->key = cursor->key; 159 | 160 | stack = &cursor->stack[cursor->depth++]; 161 | stack->slot->bits = artIdx->root->bits; 162 | stack->addr = artIdx->root; 163 | stack->lastFld = 0; 164 | stack->off = 0; 165 | stack->ch = -1; 166 | 167 | if ((stat = artFindKey(dbCursor, map, key, uniqueLen, suffixLen))) 168 | return stat; 169 | 170 | // see if we ended up on the KeyUniq node 171 | 172 | stack = &cursor->stack[cursor->depth - 1]; 173 | 174 | // latch and remember the terminal node 175 | 176 | lockLatch(stack->addr->latch); 177 | uniq = stack->addr; 178 | 179 | if (stack->addr->type != KeyUniq) { 180 | unlockLatch(stack->addr->latch); 181 | return DB_OK; 182 | } 183 | 184 | // reset cursor to enumerate duplicate keys 185 | 186 | keyUniqNode = getObj(map, *stack->addr); 187 | cursor->depth = 0; 188 | 189 | stack = &cursor->stack[cursor->depth++]; 190 | stack->slot->bits = keyUniqNode->dups->bits; 191 | stack->addr = keyUniqNode->dups; 192 | stack->lastFld = 0; 193 | stack->off = 0; 194 | stack->ch = -1; 195 | 196 | isDup = false; 197 | 198 | while (artNextKey(dbCursor, map) == DB_OK) 199 | if ((isDup = (*evalFcn)(map, dbCursor))) 200 | break; 201 | 202 | unlockLatch(uniq->latch); 203 | return isDup ? DB_ERROR_unique_key_constraint : DB_OK; 204 | } 205 | 206 | bool evalUniq(DbMap *map, ARTKeyUniq *keyUniqNode, UniqCbFcn *evalFcn) { 207 | uint8_t area[sizeof(ArtCursor)]; 208 | DbCursor *dbCursor; 209 | CursorStack* stack; 210 | ArtCursor *cursor; 211 | 212 | // prepare cursor to enumerate uniq keys 213 | 214 | dbCursor = (DbCursor *)(area); 215 | memset(dbCursor, 0, sizeof(DbCursor)); 216 | 217 | dbCursor->state = CursorPosAt; 218 | 219 | cursor = (ArtCursor *)dbCursor; 220 | memset(cursor, 0, offsetof(ArtCursor, key)); 221 | dbCursor->key = cursor->key; 222 | 223 | stack = &cursor->stack[cursor->depth++]; 224 | stack->slot->bits = keyUniqNode->dups->bits; 225 | stack->addr = keyUniqNode->dups; 226 | stack->lastFld = 0; 227 | stack->off = 0; 228 | stack->ch = -1; 229 | 230 | while (artNextKey(dbCursor, map) == DB_OK) 231 | if ((*evalFcn)(map, dbCursor)) 232 | return false; 233 | 234 | return true; 235 | } */ 236 | -------------------------------------------------------------------------------- /arxiv.1009.2764.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/malbrain/database/c27d144df8e766cebc33830e58807951401071d7/arxiv.1009.2764.pdf -------------------------------------------------------------------------------- /base64.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef _DEFAULT_SOURCE 4 | #define _DEFAULT_SOURCE 1 5 | #endif 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #ifndef _WIN32 21 | #include 22 | #include 23 | #endif 24 | 25 | #define MIN(a,b) (((a)<(b))?(a):(b)) 26 | 27 | #ifdef _WIN32 28 | #define WIN32_LEAN_AND_MEAN 29 | #include 30 | #endif 31 | 32 | typedef enum { 33 | prngRandom, // pure randomness all streams unique 34 | prngProcess, // all streams are the same 35 | prngThread // each thread has own stream, repeats across processes 36 | } PRNG; 37 | 38 | void mynrand48seed(uint16_t* nrandState, PRNG prng, uint16_t init); 39 | 40 | int createB64(uint8_t* key, int size, unsigned short next[3]); 41 | 42 | // assemble binary values after key bytes 43 | 44 | uint32_t append64(uint8_t *keyDest, int64_t *keyValues, uint8_t max, uint32_t avail); 45 | 46 | // fill in top down valueS from keyend 47 | // return number of values 48 | 49 | uint8_t parse64(uint8_t *sourceKey, int64_t *keyValues, uint8_t max); 50 | 51 | // return 64 bit suffix value from key 52 | 53 | uint64_t get64(uint8_t *key, uint32_t len); 54 | 55 | // calculate offset from right end of zone 56 | // and return suffix value 57 | 58 | uint64_t zone64(uint8_t* key, uint32_t len, uint32_t zone); 59 | 60 | // concatenate key with sortable 64 bit value 61 | // returns number of bytes concatenated 62 | 63 | uint32_t store64(uint8_t *key, uint32_t keyLen, int64_t value); 64 | 65 | uint32_t size64(uint8_t *key, uint32_t keyLen); 66 | uint32_t calc64 (int64_t value); 67 | -------------------------------------------------------------------------------- /btree1/btree1.c: -------------------------------------------------------------------------------- 1 | // btree1.c 2 | 3 | #include "btree1.h" 4 | 5 | extern uint32_t cursorSize[]; 6 | 7 | void btree1SlotClr(Btree1Page *page, int idx) { 8 | Btree1Slot *slot = (Btree1Slot *)(page + 1) + idx; 9 | 10 | slot->bits[0] = 0; 11 | slot->bits[1] = 0; 12 | } 13 | 14 | uint32_t btree1SlotMax(Btree1Page *page) { 15 | uint32_t off = page->cnt * sizeof(Btree1Slot); 16 | 17 | return off + sizeof(Btree1Page); 18 | } 19 | 20 | void btree1InitPage(Btree1Page *page, Btree1PageType type) { 21 | initLock(page->latch->readwr); 22 | initLock(page->latch->parent); 23 | initLock(page->latch->link); 24 | page->type = type; 25 | } 26 | 27 | // allocate btree1 pageId 28 | // create an empty page 29 | 30 | Btree1Page *btree1NewPage (Handle *hndl, uint8_t lvl, Btree1PageType type) { 31 | DbMap * idxMap = MapAddr(hndl); 32 | Btree1Index *btree1 = btree1index(idxMap); 33 | Btree1Page *page; 34 | ObjId pageId; 35 | uint32_t size; 36 | DbAddr *pageAddr; 37 | 38 | size = btree1->pageSize; 39 | 40 | if (type == Btree1_leafPage ) 41 | size <<= btree1->leafXtra; 42 | 43 | if( ((pageId.bits = allocObjId(idxMap)))) 44 | pageAddr = fetchIdSlot(idxMap,pageId); 45 | else 46 | return 0; 47 | 48 | if ((pageAddr->bits = allocObj(idxMap, idxMap->arena->usrFrame, type, size, true) )) 49 | page = getObj(idxMap, *pageAddr); 50 | else 51 | return 0; 52 | 53 | btree1InitPage(page, type); 54 | page->self.bits = pageAddr->bits; 55 | page->size = size; 56 | page->min = size; 57 | page->lvl = lvl; 58 | 59 | return page; 60 | } 61 | 62 | // initialize btree1 root and first page 63 | 64 | DbStatus btree1StoreSlot(Handle * hndl, uint8_t * key, uint32_t keyLen, int64_t * values, uint32_t valueCnt) 65 | { 66 | return DB_OK; 67 | } 68 | 69 | extern uint32_t librarianDensity; 70 | 71 | DbStatus btree1Init(Handle *hndl, Params *params) { 72 | DbMap *idxMap = MapAddr(hndl); 73 | Btree1Index *btree1 = btree1index(idxMap); 74 | Btree1Page *page; 75 | Btree1Page *root; 76 | Btree1Slot *slot; 77 | 78 | if (params[Btree1Bits].intVal > Btree1_maxbits) { 79 | fprintf(stderr, "createIndex: bits = %" PRIu64 " > max = %d\n", params[Btree1Bits].intVal, Btree1_maxbits); 80 | exit(1); 81 | } 82 | 83 | if (params[Btree1Bits].intVal < Btree1_minbits) { 84 | fprintf(stderr, "createIndex: bits = %" PRIu64 " < min = %d\n", 85 | params[Btree1Bits].intVal, Btree1_minbits); 86 | exit(1); 87 | } 88 | 89 | if (params[Btree1Bits].intVal + params[Btree1Xtra].intVal > 90 | Btree1_maxbits) { 91 | fprintf(stderr, "createIndex: bits = %" PRIu64 " + xtra = %" PRIu64" > max = %d\n", params[Btree1Bits].intVal, params[Btree1Xtra].intVal, Btree1_maxbits); exit(1); 92 | } 93 | 94 | btree1->pageSize = 1 << params[Btree1Bits].intVal; 95 | btree1->pageBits = (uint32_t)params[Btree1Bits].intVal; 96 | btree1->leafXtra = (uint32_t)params[Btree1Xtra].intVal; 97 | 98 | cursorSize[Hndl_btree1Index] += 1 << btree1->pageBits << btree1->leafXtra; 99 | 100 | // initial btree1 root & 101 | // right leaf pages 102 | 103 | if ((page = btree1NewPage(hndl, 0, Btree1_leafPage))) 104 | btree1->left = btree1->right = page->self; 105 | else 106 | return DB_ERROR_outofmemory; 107 | 108 | // set up the tree root page with stopper key 109 | 110 | if ((root = btree1NewPage(hndl, 1, Btree1_rootPage))) 111 | btree1->root = root->self; 112 | else 113 | return DB_ERROR_outofmemory; 114 | 115 | // set up nil root stopper key for leaf page 116 | 117 | page->cnt = 1; 118 | page->act = 1; 119 | 120 | slot = slotptr(root, 1); 121 | slot->bits[0] = 0; 122 | slot->type = Btree1_stopper; 123 | slot->childId = page->self; 124 | 125 | // release index arena lock 126 | 127 | idxMap->arena->type[0] = Hndl_btree1Index; 128 | return DB_OK; 129 | } 130 | 131 | // place write, read, or parent lock on requested page_no. 132 | 133 | void btree1LockPage(Btree1Page *page, Btree1Lock mode) { 134 | switch( mode ) { 135 | case Btree1_lockRead: 136 | readLock (page->latch->readwr); 137 | break; 138 | case Btree1_lockWrite: 139 | writeLock (page->latch->readwr); 140 | break; 141 | case Btree1_lockParent: 142 | writeLock (page->latch->parent); 143 | break; 144 | case Btree1_lockLink: 145 | writeLock (page->latch->link); 146 | break; 147 | } 148 | } 149 | 150 | void btree1UnlockPage(Btree1Page *page, Btree1Lock mode) 151 | { 152 | switch( mode ) { 153 | case Btree1_lockWrite: 154 | writeUnlock (page->latch->readwr); 155 | break; 156 | case Btree1_lockRead: 157 | readUnlock (page->latch->readwr); 158 | break; 159 | case Btree1_lockParent: 160 | writeUnlock (page->latch->parent); 161 | break; 162 | case Btree1_lockLink: 163 | writeUnlock (page->latch->link); 164 | break; 165 | } 166 | } -------------------------------------------------------------------------------- /btree1/btree1.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../base64.h" 3 | #include "../db.h" 4 | #include "../db_malloc.h" 5 | #include "../db_index.h" 6 | #include "../db_error.h" 7 | #include "../db_map.h" 8 | #include "../db_api.h" 9 | #include "../rwlock/readerwriter.h" 10 | 11 | // BTree configuration and options 12 | 13 | // #define *+ (2 + 7 + 1) 14 | #define Btree1_maxbits 29 // maximum page size in bits 15 | #define Btree1_minbits 9 // minimum page size in bits 16 | #define Btree1_minpage (1 << Btree1_minbits) // minimum page size 17 | #define Btree1_maxpage (1 << Btree1_maxbits) // maximum page size 18 | #define Btree1_keylenbits (15) 19 | #define Btree1_maxkey (1 << Btree1_keybits) // maximum key length 20 | 21 | // There are four lock types for each node in three independent sets: 22 | // 1. (set 1) ReadLock: Sharable. Read the node. Incompatible with WriteLock. 23 | // 2. (set 1) WriteLock: Exclusive. Modify the node. Incompatible with ReadLock and other WriteLocks. 24 | // 3. (set 2) ParentModification: Exclusive. Change the node's parent keys. Incompatible with another ParentModification. 25 | // 4. (set 3) LinkModification: Exclusive. Update of a node's left link is underway. Incompatible with another LinkModification. 26 | 27 | typedef enum { 28 | Btree1_lockRead = 1, 29 | Btree1_lockWrite = 2, 30 | Btree1_lockParent = 4, 31 | Btree1_lockLink = 8 32 | } Btree1Lock; 33 | 34 | typedef ObjId PageId; 35 | 36 | // types of btree pages/allocations 37 | 38 | typedef enum { 39 | Btree1_rootPage = 3, 40 | Btree1_interior, 41 | Btree1_leafPage, 42 | MAXBtree1Type 43 | } Btree1PageType; 44 | 45 | // index tree configuration 46 | 47 | typedef struct { 48 | DbIndex dbIndex[1]; 49 | uint32_t pageSize; 50 | uint32_t pageBits; 51 | uint32_t leafXtra; 52 | uint32_t librarianDensity;// 2 == every other key 53 | PageId root; 54 | PageId left; // leftmost page level 0 55 | PageId right; // rightmost page lvl 0 56 | } Btree1Index; 57 | 58 | // Slot types 59 | 60 | // In addition to the Unique keys that occupy slots 61 | // there are Librarian slots in the key slot array. 62 | 63 | // The Librarian slots are dead keys that 64 | // serve as filler, available to add new keys. 65 | 66 | typedef enum { 67 | Btree1_indexed, // key was indexed 68 | Btree1_deleted, // key was deleted 69 | Btree1_librarian, // librarian slot 70 | Btree1_fenceKey, // fence key for page 71 | Btree1_stopper // stopper slot 72 | } Btree1SlotType; 73 | 74 | typedef union { 75 | uint64_t bits[2]; 76 | 77 | struct { 78 | uint32_t off : 28; // key bytes and page offset 79 | uint32_t type : 3; // Btree1SlotType of key slot 80 | uint32_t dead : 1; // key slot deleted/dea 81 | uint16_t length; // key length incluing suffix 82 | uint16_t suffix; // bytes of 64 bit suffix in key 83 | }; 84 | union { 85 | PageId childId; // page Id of next level to leaf 86 | ObjId payLoad; // leaf (level zero) page objid 87 | }; 88 | } Btree1Slot; 89 | 90 | // Btree page layout 91 | 92 | typedef struct { 93 | RWLock readwr[1]; // read/write access lock 94 | RWLock parent[1]; // posting of fence key 95 | RWLock link[1]; // left link update 96 | } LatchSet; 97 | 98 | // The page structure is immediately 99 | // followed by an array of the key slots 100 | // and key strings on this page, allocated top-down 101 | 102 | typedef struct { 103 | union { 104 | LatchSet latch[1]; // latches for this page 105 | uint8_t base[8]; // page addressing base 106 | }; 107 | uint32_t cnt; // count of keys in page 108 | uint32_t act; // count of active keys 109 | uint32_t min; // next page key end offset 110 | uint32_t size; // page size in bytes 111 | uint32_t garbage; // page garbage in bytes 112 | Btree1PageType type:4; 113 | uint8_t lvl:4; // level of page in btree 114 | uint8_t free:1; // page is unused on free chain 115 | uint8_t kill:1; // page is being deleted 116 | PageId right; // page to right 117 | PageId left; // page to left 118 | PageId self; // current page no 119 | Btree1Slot slot[1]; // slot zero for 1 based index 120 | } Btree1Page; 121 | 122 | // Page key slot definition. 123 | 124 | // Keys are marked dead, but remain on the page until 125 | // it cleanup is called. 126 | 127 | typedef struct { 128 | DbCursor base[1]; // base object 129 | uint32_t leafSize; 130 | uint32_t slotIdx; // cursor position index 131 | Btree1Page page[]; // cursor position page buffer 132 | } Btree1Cursor; 133 | 134 | typedef struct { 135 | uint8_t *keyVal; 136 | uint32_t keyLen; 137 | uint32_t auxLen; 138 | PageId pageId; 139 | DbAddr *pageAddr; 140 | Btree1Slot *slot; 141 | Btree1Page *page; // current page Addr 142 | uint32_t slotIdx; // slot on page for key 143 | uint32_t length; 144 | } Btree1Set; 145 | 146 | // access macros 147 | 148 | #define btree1index(map) ((Btree1Index *)(map->arena + 1)) 149 | 150 | #define slotptr(page, slotidx) (page->slot + slotidx) 151 | #define keyaddr(page, keyoff) ((page->base) + keyoff) 152 | #define keyptr(page, slotidx) ((page->slot[slotidx].off + page->base)) 153 | 154 | // btree1 implementation 155 | 156 | DbStatus btree1NewCursor(DbCursor *cursor, DbMap *map); 157 | DbStatus btree1ReturnCursor(DbCursor *dbCursor, DbMap *map); 158 | 159 | DbStatus btree1LeftKey(DbCursor *cursor, DbMap *map); 160 | DbStatus btree1RightKey(DbCursor *cursor, DbMap *map); 161 | 162 | DbStatus btree1FindKey(DbCursor *dbCursor, DbMap *map, void *key, uint32_t keylen, bool onlyOne); 163 | DbStatus btree1NextKey (DbCursor *cursor, DbMap *map); 164 | DbStatus btree1PrevKey (DbCursor *cursor, DbMap *map); 165 | 166 | DbStatus btree1StoreSlot (Handle *hndl, uint8_t *key, uint32_t keyLen, int64_t *values, uint32_t valueCnt); 167 | DbStatus btree1Init(Handle *hndl, Params *params); 168 | 169 | DbStatus btree1InsertKey(Handle *index, DbKeyValue *kv, uint8_t lvl, Btree1SlotType type); 170 | 171 | DbStatus btree1DeleteKey(Handle *hndl, void *key, uint32_t keyLen); 172 | 173 | DbStatus btree1LoadPage(DbMap *map, Btree1Set *set, Btree1Lock lockMode, bool findGood, bool stopper, uint8_t lvl); 174 | 175 | DbStatus btree1SplitPage (Handle *hndl, Btree1Set *set); 176 | DbStatus btree1FixKey (Handle *index, uint8_t *fenceKey, uint64_t prev, uint64_t suffix, uint8_t lvl, bool stopper); 177 | DbStatus btree1InsertSfxKey(Handle *hndl, uint8_t *key, uint32_t keyLen, uint64_t suffix, uint8_t lvl, Btree1SlotType type); 178 | 179 | Btree1Page *btree1NewPage(Handle *index, uint8_t lvl, Btree1PageType type); 180 | void btree1LockPage(Btree1Page *page, Btree1Lock mode); 181 | void btree1UnlockPage(Btree1Page *page, Btree1Lock mode); 182 | 183 | int btree1KeyCmp(Btree1Page *page, uint32_t idx, uint8_t *keyVal, uint32_t keyLen); 184 | -------------------------------------------------------------------------------- /btree1/btree1_cursor.c: -------------------------------------------------------------------------------- 1 | // btree1_cursor.c 2 | 3 | #include "btree1.h" 4 | 5 | DbStatus btree1NewCursor(DbCursor *dbCursor, DbMap *idxMap) { 6 | Btree1Cursor *cursor = (Btree1Cursor *)dbCursor; 7 | Btree1Index *btree1 = btree1index(idxMap); 8 | 9 | cursor->leafSize = btree1->pageSize << btree1->leafXtra; 10 | cursor->slotIdx = 1; 11 | return DB_OK; 12 | } 13 | 14 | DbStatus btree1ReturnCursor(DbCursor *dbCursor, DbMap *map) { 15 | return DB_OK; 16 | } 17 | 18 | DbStatus btree1LeftKey(DbCursor *dbCursor, DbMap *map) { 19 | Btree1Cursor *cursor = (Btree1Cursor *)dbCursor; 20 | Btree1Index *btree1 = btree1index(map); 21 | Btree1Page *leftPage; 22 | DbAddr *leftAddr; 23 | PageId leftId; 24 | 25 | if (leftId.bits = cursor->page->left.bits) 26 | leftAddr = fetchIdSlot(map, leftId); 27 | else 28 | return DB_CURSOR_eof; 29 | 30 | leftPage = getObj(map, *leftAddr); 31 | btree1LockPage (leftPage, Btree1_lockRead); 32 | 33 | memcpy (cursor->page, leftPage, cursor->leafSize); 34 | btree1UnlockPage (leftPage, Btree1_lockRead); 35 | 36 | cursor->slotIdx = 1; 37 | return DB_OK; 38 | } 39 | 40 | DbStatus btree1RightKey(DbCursor *dbCursor, DbMap *map) { 41 | Btree1Cursor *cursor = (Btree1Cursor *)dbCursor; 42 | Btree1Index *btree1 = btree1index(map); 43 | Btree1Page *rightPage; 44 | DbAddr *rightAddr; 45 | PageId rightId; 46 | 47 | if (rightId.bits = cursor->page->right.bits) 48 | rightAddr = fetchIdSlot(map, rightId); 49 | else 50 | return DB_CURSOR_eof; 51 | 52 | rightPage = getObj(map, *rightAddr); 53 | btree1LockPage(rightPage, Btree1_lockRead); 54 | memcpy(cursor->page, rightPage, cursor->leafSize); 55 | btree1UnlockPage(rightPage, Btree1_lockRead); 56 | cursor->slotIdx = cursor->page->cnt; 57 | return DB_OK; 58 | } 59 | 60 | DbStatus btree1NextKey (DbCursor *dbCursor, DbMap *map) { 61 | Btree1Cursor *cursor = (Btree1Cursor *)dbCursor; 62 | Btree1Page *rightPage; 63 | DbAddr *rightAddr; 64 | PageId rightId; 65 | uint8_t *key; 66 | 67 | switch (dbCursor->state) { 68 | case CursorNone: 69 | btree1LeftKey(dbCursor, map); 70 | break; 71 | 72 | case CursorRightEof: 73 | return DB_CURSOR_eof; 74 | 75 | default: 76 | break; 77 | } 78 | 79 | while (true) { 80 | uint32_t max = cursor->page->cnt; 81 | 82 | while (cursor->slotIdx <= max) { 83 | Btree1Slot *slot = slotptr(cursor->page, cursor->slotIdx++); 84 | 85 | if (slot->dead) 86 | continue; 87 | 88 | key = keyaddr(cursor->page, slot->off); 89 | dbCursor->key = key; 90 | dbCursor->keyLen = slot->length; 91 | dbCursor->state = CursorPosAt; 92 | return DB_OK; 93 | } 94 | 95 | if (rightId.bits = cursor->page->right.bits) 96 | rightAddr = fetchIdSlot(map, rightId); 97 | else 98 | break; 99 | 100 | rightPage = getObj(map, *rightAddr); 101 | btree1LockPage(rightPage, Btree1_lockRead); 102 | memcpy(cursor->page, rightPage, cursor->leafSize); 103 | btree1UnlockPage(rightPage, Btree1_lockRead); 104 | cursor->slotIdx = 1; 105 | } 106 | 107 | dbCursor->state = CursorRightEof; 108 | return DB_CURSOR_eof; 109 | } 110 | 111 | DbStatus btree1PrevKey (DbCursor *dbCursor, DbMap *map) { 112 | Btree1Cursor *cursor = (Btree1Cursor *)dbCursor; 113 | Btree1Page *leftPage; 114 | DbAddr *leftAddr; 115 | PageId leftId; 116 | uint8_t *key; 117 | 118 | switch (dbCursor->state) { 119 | case CursorNone: 120 | btree1RightKey(dbCursor, map); 121 | break; 122 | 123 | case CursorLeftEof: 124 | return DB_CURSOR_eof; 125 | 126 | default: 127 | break; 128 | } 129 | 130 | while (true) { 131 | if (cursor->slotIdx ) { 132 | Btree1Slot *slot = slotptr(cursor->page, cursor->slotIdx--); 133 | 134 | if (slot->dead) 135 | continue; 136 | 137 | key = keyaddr(cursor->page, slot->off); 138 | dbCursor->key = key; 139 | dbCursor->keyLen = slot->length; 140 | dbCursor->state = CursorPosAt; 141 | return DB_OK; 142 | } 143 | 144 | if (leftId.bits = cursor->page->left.bits) 145 | leftAddr = fetchIdSlot(map,leftId); 146 | else 147 | break; 148 | 149 | leftPage = getObj(map, *leftAddr); 150 | btree1LockPage(leftPage, Btree1_lockRead); 151 | memcpy(cursor->page, leftPage, cursor->leafSize); 152 | btree1UnlockPage(leftPage, Btree1_lockRead); 153 | cursor->slotIdx = cursor->page->cnt; 154 | } 155 | 156 | dbCursor->state = CursorLeftEof; 157 | return DB_CURSOR_eof; 158 | } 159 | -------------------------------------------------------------------------------- /btree1/btree1_delete.c: -------------------------------------------------------------------------------- 1 | // btree1_delete.c 2 | 3 | #include "btree1.h" 4 | 5 | DbStatus btree1DeleteKey(Handle *index, void *key, uint32_t len) { 6 | return DB_OK; 7 | } 8 | 9 | // todo: adapt btree-source-code delete below 10 | 11 | #if 0 12 | // a fence key was deleted from an interiour level page 13 | // push new fence value upwards 14 | 15 | BTERR bt_fixfence (BtMgr *mgr, BtPageSet *set, uint lvl, ushort thread_no) 16 | { 17 | unsigned char leftkey[BT_keyarray], rightkey[BT_keyarray]; 18 | unsigned char value[BtId]; 19 | BtKey* ptr; 20 | uint idx; 21 | 22 | // remove the old fence value 23 | 24 | ptr = fenceptr(set->page); 25 | memcpy (rightkey, ptr, ptr->len + sizeof(BtKey)); 26 | memset (slotptr(set->page, set->page->cnt--), 0, sizeof(BtSlot)); 27 | set->page->fence = slotptr(set->page, set->page->cnt)->off; 28 | 29 | // cache new fence value 30 | 31 | ptr = fenceptr(set->page); 32 | memcpy (leftkey, ptr, ptr->len + sizeof(BtKey)); 33 | 34 | bt_lockpage (BtLockParent, set->latch, thread_no, __LINE__); 35 | bt_unlockpage (BtLockWrite, set->latch, thread_no, __LINE__); 36 | 37 | // insert new (now smaller) fence key 38 | 39 | bt_putid (value, set->latch->page_no); 40 | ptr = (BtKey*)leftkey; 41 | 42 | if( bt_insertkey (mgr, ptr->key, ptr->len, lvl+1, value, BtId, Unique, thread_no) ) 43 | return mgr->err_thread = thread_no, mgr->err; 44 | 45 | // now delete old fence key 46 | 47 | ptr = (BtKey*)rightkey; 48 | 49 | if( bt_deletekey (mgr, ptr->key, ptr->len, lvl+1, thread_no) ) 50 | return mgr->err_thread = thread_no, mgr->err; 51 | 52 | bt_unlockpage (BtLockParent, set->latch, thread_no, __LINE__); 53 | bt_unpinlatch(set->latch, 1, thread_no, __LINE__); 54 | return 0; 55 | } 56 | 57 | // root has a single child 58 | // collapse a level from the tree 59 | 60 | BTERR bt_collapseroot (BtMgr *mgr, BtPageSet *root, ushort thread_no) 61 | { 62 | BtPageSet child[1]; 63 | uid page_no; 64 | BtVal *val; 65 | uint idx; 66 | 67 | // find the child entry and promote as new root contents 68 | 69 | do { 70 | for( idx = 0; idx++ < root->page->cnt; ) 71 | if( !slotptr(root->page, idx)->dead ) 72 | break; 73 | 74 | val = valptr(root->page, idx); 75 | 76 | if( val->len == BtId ) 77 | page_no = bt_getid (valptr(root->page, idx)->value); 78 | else 79 | return mgr->line = __LINE__, mgr->err_thread = thread_no, mgr->err = BTERR_struct; 80 | 81 | if( child->latch = bt_pinlatch (mgr, page_no, thread_no) ) 82 | child->page = bt_mappage (mgr, child->latch); 83 | else 84 | return mgr->err_thread = thread_no, mgr->err; 85 | 86 | bt_lockpage (BtLockDelete, child->latch, thread_no, __LINE__); 87 | bt_lockpage (BtLockWrite, child->latch, thread_no, __LINE__); 88 | 89 | memcpy (root->page, child->page, mgr->page_size); 90 | bt_freepage (mgr, child, thread_no); 91 | 92 | } while( root->page->lvl > 1 && root->page->act == 1 ); 93 | 94 | bt_unlockpage (BtLockWrite, root->latch, thread_no, __LINE__); 95 | bt_unpinlatch (root->latch, 1, thread_no, __LINE__); 96 | return 0; 97 | } 98 | 99 | // delete a page and manage key 100 | // call with page writelocked 101 | 102 | // returns with page unpinned 103 | // from the page pool. 104 | 105 | BTERR bt_deletepage (BtMgr *mgr, BtPageSet *set, ushort thread_no, uint lvl) 106 | { 107 | unsigned char lowerfence[BT_keyarray]; 108 | uint page_size = mgr->page_size, kill; 109 | BtPageSet right[1], temp[1]; 110 | unsigned char value[BtId]; 111 | uid page_no, right2; 112 | BtKey *ptr; 113 | 114 | if( !lvl ) 115 | page_size <<= mgr->leaf_xtra; 116 | 117 | // cache original copy of original fence key 118 | // that is going to be deleted. 119 | 120 | ptr = fenceptr(set->page); 121 | memcpy (lowerfence, ptr, ptr->len + sizeof(BtKey)); 122 | 123 | // pin and lock our right page 124 | 125 | page_no = set->page->right; 126 | 127 | if( right->latch = lvl ? bt_pinlatch (mgr, page_no, thread_no) : bt_pinleaf (mgr, page_no, thread_no) ) 128 | right->page = bt_mappage (mgr, right->latch); 129 | else 130 | return 0; 131 | 132 | bt_lockpage (BtLockWrite, right->latch, thread_no, __LINE__); 133 | 134 | if( right->page->kill || set->page->kill ) 135 | return mgr->line = __LINE__, mgr->err = BTERR_struct; 136 | 137 | // pull contents of right sibling over our empty page 138 | // preserving our left page number, and its right page number. 139 | 140 | bt_lockpage (BtLockLink, set->latch, thread_no, __LINE__); 141 | page_no = set->page->left; 142 | memcpy (set->page, right->page, page_size); 143 | set->page->left = page_no; 144 | bt_unlockpage (BtLockLink, set->latch, thread_no, __LINE__); 145 | 146 | // fix left link from far right page 147 | 148 | if( right2 = set->page->right ) { 149 | if( temp->latch = lvl ? bt_pinlatch (mgr, right2, thread_no) : bt_pinleaf (mgr, right2, thread_no) ) 150 | temp->page = bt_mappage (mgr, temp->latch); 151 | else 152 | return 0; 153 | 154 | bt_lockpage (BtLockAccess, temp->latch, thread_no, __LINE__); 155 | bt_lockpage(BtLockLink, temp->latch, thread_no, __LINE__); 156 | temp->page->left = set->latch->page_no; 157 | bt_unlockpage(BtLockLink, temp->latch, thread_no, __LINE__); 158 | bt_unlockpage(BtLockAccess, temp->latch, thread_no, __LINE__); 159 | bt_unpinlatch (temp->latch, 1, thread_no, __LINE__); 160 | } else if( !lvl ) { // our page is now rightmost leaf 161 | bt_mutexlock (mgr->lock); 162 | mgr->pagezero->alloc->left = set->latch->page_no; 163 | bt_releasemutex(mgr->lock); 164 | } 165 | 166 | // mark right page as being deleted and release lock 167 | 168 | right->page->kill = 1; 169 | bt_unlockpage (BtLockWrite, right->latch, thread_no, __LINE__); 170 | 171 | // redirect the new higher key directly to our new node 172 | 173 | ptr = fenceptr(set->page); 174 | bt_putid (value, set->latch->page_no); 175 | 176 | if( bt_insertkey (mgr, ptr->key, ptr->len, lvl+1, value, BtId, Update, thread_no) ) 177 | return mgr->err; 178 | 179 | // delete our original fence key in parent 180 | 181 | ptr = (BtKey *)lowerfence; 182 | 183 | if( bt_deletekey (mgr, ptr->key, ptr->len, lvl+1, thread_no) ) 184 | return mgr->err; 185 | 186 | // wait for all access to drain away with delete lock, 187 | // then obtain write lock to right node and free it. 188 | 189 | bt_lockpage (BtLockDelete, right->latch, thread_no, __LINE__); 190 | bt_lockpage (BtLockWrite, right->latch, thread_no, __LINE__); 191 | bt_lockpage (BtLockLink, right->latch, thread_no, __LINE__); 192 | bt_freepage (mgr, right, thread_no); 193 | 194 | // release write lock to our node 195 | 196 | bt_unlockpage (BtLockWrite, set->latch, thread_no, __LINE__); 197 | bt_unpinlatch (set->latch, 1, thread_no, __LINE__); 198 | return 0; 199 | } 200 | 201 | // find and delete key on page by marking delete flag bit 202 | // if page becomes empty, delete it from the btree 203 | 204 | BTERR bt_deletekey (BtMgr *mgr, unsigned char *key, uint len, uint lvl, ushort thread_no) 205 | { 206 | uint slot, idx, found, fence; 207 | BtPageSet set[1]; 208 | BtSlot *node; 209 | BtKey *ptr; 210 | BtVal *val; 211 | 212 | if( slot = bt_loadpage (mgr, set, key, len, lvl, BtLockWrite, thread_no) ) { 213 | node = slotptr(set->page, slot); 214 | ptr = keyptr(set->page, slot); 215 | } else 216 | return mgr->err_thread = thread_no, mgr->err; 217 | 218 | // if librarian slot, advance to real slot 219 | 220 | if( node->type == Librarian ) { 221 | ptr = keyptr(set->page, ++slot); 222 | node = slotptr(set->page, slot); 223 | } 224 | 225 | fence = slot == set->page->cnt; 226 | 227 | // delete the key, ignore request if already dead 228 | 229 | if( found = !keycmp (ptr, key, len) ) 230 | if( found = node->dead == 0 ) { 231 | val = valptr(set->page,slot); 232 | set->page->garbage += ptr->len + val->len + sizeof(BtKey) + sizeof(BtVal); 233 | set->page->act--; 234 | node->dead = 1; 235 | 236 | // collapse empty slots beneath the fence 237 | // on interiour nodes 238 | 239 | if( lvl ) 240 | while( idx = set->page->cnt - 1 ) 241 | if( slotptr(set->page, idx)->dead ) { 242 | *slotptr(set->page, idx) = *slotptr(set->page, idx + 1); 243 | memset (slotptr(set->page, set->page->cnt--), 0, sizeof(BtSlot)); 244 | } else 245 | break; 246 | } 247 | 248 | if( !found ) 249 | return 0; 250 | 251 | // did we delete a fence key in an upper level? 252 | 253 | if( lvl && set->page->act && fence ) 254 | return bt_fixfence (mgr, set, lvl, thread_no); 255 | 256 | // do we need to collapse root? 257 | 258 | if( lvl > 1 && set->latch->page_no == ROOT_page && set->page->act == 1 ) 259 | return bt_collapseroot (mgr, set, thread_no); 260 | 261 | // delete empty page 262 | 263 | if( !set->page->act ) 264 | return bt_deletepage (mgr, set, thread_no, set->page->lvl); 265 | 266 | bt_unlockpage(BtLockWrite, set->latch, thread_no, __LINE__); 267 | bt_unpinlatch (set->latch, 1, thread_no, __LINE__); 268 | return 0; 269 | } 270 | #endif 271 | -------------------------------------------------------------------------------- /btree1/btree1_find.c: -------------------------------------------------------------------------------- 1 | // btree1_find.c 2 | 3 | #include "btree1.h" 4 | 5 | DbStatus btree1FindKey( DbCursor *dbCursor, DbMap *map, void *key, uint32_t keyLen, bool onlyOne) { 6 | Btree1Cursor *cursor = (Btree1Cursor *)dbCursor; 7 | Btree1Index *btree1 = btree1index(map); 8 | uint8_t *foundKey; 9 | Btree1Slot *slot; 10 | Btree1Set set[1]; 11 | DbStatus stat; 12 | 13 | // find the level 0 page containing the key 14 | 15 | set->keyVal = key; 16 | set->keyLen = keyLen; 17 | 18 | if ((stat = btree1LoadPage(map, set, Btree1_lockRead, true, false, 0))) 19 | return stat; 20 | 21 | slot = slotptr(set->page, set->slotIdx); 22 | 23 | if (slot->type == Btree1_stopper) { 24 | btree1UnlockPage (set->page, Btree1_lockRead); 25 | return DB_CURSOR_eof; 26 | } 27 | 28 | foundKey = keyaddr(set->page, slot->off); 29 | cursor->base->state = CursorPosAt; 30 | 31 | if (onlyOne) { 32 | memset (cursor->page, 0, sizeof(Btree1Page)); 33 | cursor->page->cnt = 2; 34 | cursor->page->act = 2; 35 | 36 | cursor->page->min = btree1->pageSize >> btree1->leafXtra; 37 | cursor->page->min -= slot->length; 38 | 39 | slotptr(cursor->page, 1)->bits[0] = cursor->page->min; 40 | slotptr(cursor->page, 2)->type = Btree1_stopper; 41 | 42 | memcpy (keyptr(cursor->page,1), foundKey, slot->length); 43 | 44 | btree1UnlockPage(set->page, Btree1_lockRead); 45 | cursor->base->key = keyptr(cursor->page, 1); 46 | cursor->base->keyLen = slot->length; 47 | cursor->slotIdx = 1; 48 | return DB_OK; 49 | } 50 | 51 | memcpy(cursor->page, set->page, btree1->pageSize); 52 | btree1UnlockPage(set->page, Btree1_lockRead); 53 | 54 | cursor->base->key = foundKey; 55 | cursor->base->keyLen = slot->length; 56 | cursor->slotIdx = set->slotIdx; 57 | return DB_OK; 58 | } 59 | -------------------------------------------------------------------------------- /btree1/btree1_insert.c: -------------------------------------------------------------------------------- 1 | // btree_insert.c 2 | 3 | #include "btree1.h" 4 | 5 | 6 | extern bool debug; 7 | 8 | DbStatus btree1InsertKey(Handle *index, DbKeyValue *kv, uint8_t lvl, Btree1SlotType type) { 9 | DbMap *idxMap = MapAddr(index); 10 | uint32_t length; 11 | Btree1Slot *slot; 12 | Btree1Set set[1]; 13 | Btree1Page *page; 14 | int32_t max, cnt; 15 | int32_t idx, tst; 16 | DbStatus stat; 17 | uint8_t *ptr; 18 | 19 | length = kv->keyLen; 20 | 21 | while (true) { 22 | memset(set, 0, sizeof(set)); 23 | set->keyLen = kv->keyLen; 24 | set->keyVal = kv->keyBuff; 25 | set->auxLen = kv->suffixLen; 26 | set->length = length; 27 | 28 | // drill down to lvl page containing key 29 | 30 | if ((stat = btree1LoadPage(idxMap, set, Btree1_lockWrite, false, false, lvl))) 31 | return stat; 32 | 33 | // dest page overflow? 34 | // if so, split the page 35 | 36 | if (set->page->min < sizeof(Btree1Slot) * set->page->cnt + length + sizeof(Btree1Page)) 37 | if(( stat = btree1SplitPage(index, set))) 38 | return stat; 39 | else 40 | continue; 41 | 42 | page = set->page; 43 | assert(set->slotIdx < page->cnt); 44 | slot = slotptr(set->page, set->slotIdx); 45 | 46 | if( set->slotIdx < set->page->cnt ) 47 | if(slot->type == Btree1_librarian) 48 | set->slotIdx++, slot++; 49 | 50 | // check for duplicate key already on the page 51 | 52 | if( set->slotIdx <= set->page->cnt ) 53 | if( !btree1KeyCmp(set->page, set->slotIdx, kv->keyBuff, kv->keyLen) ) 54 | return DB_ERROR_duplicatekey; 55 | 56 | // slot now points to where the new 57 | // key would be inserted when open 58 | 59 | // find nearest open `dead/librarian slot 60 | // and bubble dead to slot[0] 61 | 62 | max = set->page->cnt; 63 | tst = set->slotIdx; 64 | idx = 0; 65 | 66 | do { 67 | if( cnt = idx++ + tst < max ) { 68 | if( slot[idx].dead ) do { 69 | slot[idx].bits[0] = slot[idx-1].bits[0]; 70 | slot[idx].bits[1] = slot[idx-1].bits[1]; 71 | } while( --idx ); 72 | break; 73 | } 74 | 75 | if( idx < max && ++cnt ) { 76 | if( slot[-idx].dead ) do { 77 | slot[-idx].bits[0] = slot[-idx + 1].bits[0]; 78 | slot[-idx].bits[1] = slot[-idx + 1].bits[1]; 79 | } while( --idx ); 80 | break; 81 | } 82 | } while( cnt > 0 ); 83 | 84 | assert((uint64_t)page->min < sizeof (Btree1Slot) * (uint64_t)(page->cnt + 1) + (uint64_t)set->length + sizeof(Btree1Page)); 85 | 86 | slot->off = page->min -= set->length; 87 | 88 | // add the key with its suffix to the page 89 | 90 | ptr = keyaddr(page, page->min); 91 | page->act += 1; 92 | memcpy (ptr, kv->keyBuff, kv->keyLen); 93 | slot->type = type; 94 | } 95 | btree1UnlockPage (set->page, Btree1_lockWrite); 96 | return DB_OK; 97 | } 98 | 99 | // compare two keys, return > 0, = 0, or < 0 100 | // =0: all key fields are same 101 | // -1: key2 > key1 102 | // +1: key2 < key1 103 | 104 | int btree1KeyCmp(Btree1Page *page, uint32_t idx, uint8_t *keyVal, uint32_t keyLen) 105 | { 106 | Btree1Slot *slot = slotptr(page, idx); 107 | int ans; 108 | 109 | ans = memcmp(keyptr(page, idx), keyVal, MIN(slot->length, keyLen)); 110 | 111 | return ans; 112 | } 113 | 114 | // find slot in page that is .ge. given search key 115 | 116 | // return zero if past end of all slots 117 | // return slot idx for key that is .ge. passed key. 118 | 119 | uint32_t btree1FindSlot(Btree1Page *page, uint8_t *key, uint32_t keyLen) 120 | { 121 | uint32_t diff, higher = page->cnt + 1, low = 1, idx; 122 | bool good; 123 | 124 | // virtual stopper key? 125 | 126 | if ((good = !page->right.bits)) 127 | if (page->lvl) 128 | higher -= 1; 129 | 130 | // low is a candidate. 131 | // higher is already 132 | // tested as .ge. the given key. 133 | // loop ends when they meet 134 | 135 | while ((diff = higher - low)) { 136 | idx = low + diff / 2; 137 | 138 | if (btree1KeyCmp(page, idx, key, keyLen) < 0) 139 | low = idx + 1; 140 | else 141 | higher = idx, good = true; 142 | } 143 | 144 | return good ? higher : 0; 145 | } 146 | 147 | // lock and load page at given level for given key 148 | // Librarian slots have the same key offset as their higher neighbor 149 | 150 | DbStatus btree1LoadPage(DbMap * map, Btree1Set * set, Btree1Lock lock, bool findGood, bool stopper, uint8_t lvl) { 151 | Btree1Index *btree1 = btree1index(map); 152 | uint8_t drill = 0xff; 153 | Btree1Page *prevPage = NULL; 154 | Btree1Lock mode, prevMode; 155 | Btree1Slot *slot; 156 | PageId prevPageId; 157 | uint64_t bits; 158 | 159 | bits = btree1->root.bits; 160 | prevPageId.bits = 0; 161 | 162 | // start at the root level of the btree1 and drill down 163 | 164 | while ((set->pageId.bits = bits)) { 165 | 166 | // determine lock mode of drill level 167 | 168 | mode = (drill == lvl) ? lock : Btree1_lockRead; 169 | set->pageAddr = fetchIdSlot(map, set->pageId); 170 | set->page = getObj(map, *set->pageAddr); 171 | 172 | // release parent or left sibling page 173 | 174 | if (prevPageId.bits) { 175 | btree1UnlockPage(prevPage, prevMode); 176 | prevPageId.bits = 0; 177 | } 178 | 179 | // obtain mode lock 180 | 181 | btree1LockPage(set->page, mode); 182 | 183 | if (set->page->free) 184 | return DB_BTREE_error; 185 | 186 | // re-read and re-lock root after determining actual level of root 187 | 188 | if (set->page->lvl != drill) { 189 | assert(drill == 0xff); 190 | drill = set->page->lvl; 191 | 192 | if (lock != Btree1_lockRead && drill == lvl) { 193 | btree1UnlockPage(set->page, mode); 194 | continue; 195 | } 196 | } 197 | 198 | assert(lvl <= set->page->lvl); 199 | 200 | prevPageId.bits = set->page->self.bits; 201 | prevPage = set->page; 202 | prevMode = mode; 203 | 204 | // find key on page at this level 205 | // and descend to requested level 206 | 207 | if (set->page->kill) { 208 | bits = set->page->right.bits; 209 | continue; 210 | } 211 | 212 | // if page is empty 213 | 214 | if (set->page->cnt == 0) { 215 | set->slotIdx = 0; 216 | return DB_OK; 217 | } 218 | 219 | // find slot on page 220 | 221 | if (stopper) 222 | set->slotIdx = set->page->cnt; 223 | else 224 | set->slotIdx = btree1FindSlot(set->page, set->keyVal, set->keyLen); 225 | 226 | // slide right into next page 227 | 228 | if (!set->slotIdx) { 229 | bits = set->page->right.bits; 230 | continue; 231 | } 232 | 233 | // find next higher non-dead slot 234 | 235 | if ((drill == lvl && findGood) || drill > lvl) 236 | while (set->slotIdx < set->page->cnt) 237 | if (slotptr(set->page, set->slotIdx)->dead) 238 | set->slotIdx++; 239 | else 240 | break; 241 | 242 | if (drill == lvl) 243 | return DB_OK; 244 | 245 | // continue on next page down 246 | 247 | slot = slotptr(set->page, set->slotIdx); 248 | assert(drill > 0); 249 | drill--; 250 | } 251 | 252 | // return error on end of right chain 253 | 254 | return DB_BTREE_error; 255 | } 256 | -------------------------------------------------------------------------------- /btree1/btree1_util.c: -------------------------------------------------------------------------------- 1 | // btree1_util.c 2 | 3 | #include "btree1.h" 4 | 5 | // debug slot function 6 | 7 | #ifndef _DEBUG 8 | Btree1Slot *btree1Slot(Btree1Page *page, uint32_t idx) 9 | { 10 | return slotptr(page, idx); 11 | } 12 | 13 | uint8_t *btree1Key(Btree1Page *page, uint32_t idx) 14 | { 15 | return keyptr(page, idx); 16 | } 17 | 18 | uint8_t *btree1Addr(Btree1Page *page, uint32_t off) 19 | { 20 | return keyaddr(page, off); 21 | } 22 | 23 | #undef keyptr 24 | #undef keyaddr 25 | #undef slotptr 26 | #define keyptr(p,x) btree1Key(p,x) 27 | #define keyaddr(p,o) btree1Addr(p,o) 28 | #define slotptr(p,x) btree1Slot(p,x) 29 | #endif 30 | 31 | uint32_t librarianDensity = 3; 32 | extern bool stats; 33 | uint32_t Splits; 34 | 35 | // function to copy keys from one page to another 36 | 37 | // move a segment of keys to a new page 38 | // idx - first source slot moved to dest (start with zero) 39 | // max - final source slot to be moved 40 | 41 | uint32_t btree1SplitCopy(Btree1Page *destPage, Btree1Page *slotPage, uint32_t idx, uint32_t max) { 42 | uint32_t librarianIdx = 0, cnt = destPage->cnt; 43 | Btree1Slot *slot, *dest; 44 | 45 | dest = slotptr(destPage, destPage->cnt); 46 | 47 | while (++idx <= max) { 48 | slot = slotptr(slotPage, idx); 49 | 50 | if (slot->dead) 51 | continue; 52 | 53 | // dest page overflow 54 | 55 | if (destPage->min < sizeof(Btree1Slot) * cnt + sizeof(Btree1Page)) 56 | return idx; 57 | 58 | // librarian slot inserts 59 | // never the highest slot index 60 | 61 | if(librarianDensity ) 62 | if(++librarianIdx % librarianDensity == 0) { 63 | dest->bits[0] = 0; 64 | dest->type = Btree1_librarian; 65 | dest->dead = true; 66 | dest++; 67 | cnt++; 68 | }; 69 | 70 | dest->off = destPage->min -= slot->length; 71 | dest->payLoad = slot->payLoad; 72 | dest->length = slot->length; 73 | dest->suffix = slot->suffix; 74 | dest++; 75 | cnt++; 76 | 77 | memcpy(destPage->base + dest->off, slotPage->base + slot->off, slot->length); 78 | } 79 | 80 | destPage->cnt += cnt; 81 | return idx; 82 | } 83 | 84 | // split already locked full node into two (left & right) 85 | // each with 1/2 of the keys lower (left) and higher (right) 86 | // if this was the root page, pass lower/upper to split root 87 | // return with pages unlocked. 88 | // split the page and raise the height of the btree1 89 | // call with key for smaller (left) half and right page addr. 90 | 91 | DbStatus btree1SplitPage(Handle *index, Btree1Set *set) { 92 | DbMap *idxMap = MapAddr(index); 93 | Btree1Index *btree1 = btree1index(idxMap); 94 | Btree1Page *leftPage, *rightPage, *rootPage; 95 | uint8_t lvl = set->page->lvl; 96 | uint32_t max, idx; 97 | Btree1Slot *dest, *slot; 98 | 99 | if (stats) 100 | atomicAdd32(&Splits, 1); 101 | 102 | // copy lower keys into a new empty left page 103 | 104 | if ((leftPage = btree1NewPage(index, lvl, Btree1_interior))) 105 | max = set->page->cnt; 106 | else 107 | return DB_ERROR_outofmemory; 108 | 109 | if( !(idx = btree1SplitCopy(leftPage, set->page, 0, max / 2))) 110 | return DB_ERROR_outofmemory; 111 | 112 | // construct higher (rightPage) page 113 | // from remaining half of old root (set->page) 114 | 115 | if (!(rightPage = btree1NewPage(index, lvl, Btree1_interior ))) 116 | return DB_ERROR_outofmemory; 117 | 118 | // fill lower keys (leftPage) page 119 | // from lower half of overflowing (set->page) 120 | 121 | if( !( idx = btree1SplitCopy(rightPage, set->page, idx, max))) 122 | return DB_ERROR_outofmemory; 123 | 124 | rightPage->left = leftPage->self; 125 | rightPage->right = set->page->right; 126 | 127 | leftPage->left = set->page->left; 128 | leftPage->right = rightPage->self; 129 | 130 | if(set->page->type == Btree1_rootPage){ 131 | 132 | // insert stopper key on new root page 133 | // pointing to the new right half page 134 | 135 | if (!(rootPage = btree1NewPage(index, lvl + 1, Btree1_rootPage))) 136 | return DB_ERROR_outofmemory; 137 | 138 | rootPage->cnt = 2; 139 | rootPage->act = 2; 140 | 141 | // newroot slot 2 constructed right pageId 142 | 143 | dest = slotptr(rootPage, 2); 144 | dest->type = Btree1_stopper; 145 | dest->payLoad.bits = rightPage->self.bits; 146 | 147 | // highest lower keys (left) on newroot 148 | // and higher keys (stopper) 149 | 150 | slot = slotptr(leftPage, leftPage->cnt); 151 | 152 | dest = slotptr(rootPage, 1); 153 | dest->type = Btree1_indexed; 154 | dest->off = leftPage->min -= slot->length; 155 | dest->length = slot->length; 156 | dest->suffix = slot->suffix; 157 | dest->payLoad.bits = leftPage->self.bits; 158 | 159 | memcpy(keyaddr(rootPage, dest->off), keyaddr(leftPage, slot->off), slot->length); 160 | 161 | // install new root, return old root 162 | btree1->root.bits = rootPage->self.bits; 163 | // 164 | //if (addSlotToFrame(idxMap, listFree(index, set->page->self.type), NULL, 165 | // set->page->self.bits)) 166 | return DB_OK; 167 | // else 168 | return DB_ERROR_outofmemory; 169 | } 170 | // todo: locks & freelist 171 | //install new root 172 | return DB_OK; 173 | } 174 | 175 | // insert left/right page fence keys 176 | 177 | // insert new fence in the parent page 178 | /* 179 | btree1LockPage (rightPage, Btree1_lockParent); 180 | btree1LockPage (set->page, Btree1_lockParent); 181 | btree1UnlockPage (set->page, Btree1_lockWrite); 182 | 183 | keyLen = keylen(leftKey); 184 | 185 | if (set->page->lvl) 186 | keyLen -= Btree1_pagenobytes; // strip off pageNo 187 | 188 | // add key for page of smaller keys to parent 189 | 190 | if ((stat = btree1InsertSfxKey(index, keystr(leftKey), keyLen, set->pageNo.bits, lvl + 1, Btree1_indexed))) 191 | return stat; 192 | 193 | // switch parent key for larger keys to new right page 194 | 195 | if( (stat = btree1FixKey(index, rightKey, set->pageNo.bits, right.bits, lvl+1, !stopper) )) 196 | return stat; 197 | 198 | btree1UnlockPage (set->page, Btree1_lockParent); 199 | btree1UnlockPage (rightPage, Btree1_lockParent); 200 | 201 | if (addSlotToFrame(idxMap, listFree(index, addr.type), NULL, addr.bits)) 202 | return DB_OK; 203 | 204 | return DB_ERROR_outofmemory; 205 | } 206 | 207 | // check page for space available, 208 | // clean if necessary and return 209 | // false - page needs splitting 210 | // true - ok to insert 211 | 212 | DbStatus btree1CleanPage(Handle *index, Btree1Set *set) { 213 | DbMap *idxMap = MapAddr(index); 214 | Btree1Index *btree1 = btree1index(idxMap); 215 | Btree1Slot librarian, *source, *dest; 216 | uint32_t size = btree1->pageSize; 217 | Btree1Page *page = set->page; 218 | uint32_t max = page->cnt; 219 | uint32_t len, cnt, idx; 220 | uint32_t newSlot = max; 221 | Btree1PageType type; 222 | Btree1Page *frame; 223 | uint32_t totKeyLen; 224 | uint8_t *key; 225 | DbAddr addr; 226 | 227 | librarian.bits = 0; 228 | librarian.type = Btree1_librarian; 229 | librarian.dead = 1; 230 | 231 | if( !page->lvl ) { 232 | size <<= btree1->leafXtra; 233 | type = Btree1_leafPage; 234 | } else 235 | type = Btree1_interior; 236 | 237 | if( page->min >= (max+1) * sizeof(Btree1Slot) + sizeof(*page) + totKeyLen ) 238 | return DB_OK; 239 | 240 | // skip cleanup and proceed directly to split 241 | // if there's not enough garbage 242 | // to bother with. 243 | 244 | if( page->garbage < size / 5 ) 245 | return DB_BTREE_needssplit; 246 | 247 | if( (addr.bits = allocObj(idxMap, listFree(index, type), NULL, type, size, false)) ) 248 | frame = getObj(idxMap, addr); 249 | else 250 | return DB_ERROR_outofmemory; 251 | 252 | memcpy (frame, page, size); 253 | 254 | // skip page info and set rest of page to zero 255 | 256 | memset (page+1, 0, size - sizeof(*page)); 257 | page->garbage = 0; 258 | page->act = 0; 259 | 260 | cnt = 0; 261 | idx = 0; 262 | 263 | source = slotptr(frame, cnt); 264 | dest = slotptr(page, idx); 265 | 266 | // clean up page first by 267 | // removing deleted keys 268 | 269 | while( source++, cnt++ < max ) { 270 | if( cnt == set->slotIdx ) 271 | newSlot = idx + 2; 272 | 273 | if( source->dead ) 274 | continue; 275 | 276 | // copy the active key across 277 | 278 | key = keyaddr(frame, source->off); 279 | len = keylen(key) + keypre(key); 280 | size -= len; 281 | 282 | memcpy ((uint8_t *)page + size, key, len); 283 | 284 | // make a librarian slot 285 | 286 | if (cnt < max) { 287 | (++dest)->bits = librarian.bits; 288 | ++idx; 289 | } 290 | 291 | // set up the slot 292 | 293 | (++dest)->bits = source->bits; 294 | dest->off = size; 295 | idx++; 296 | 297 | page->act++; 298 | } 299 | 300 | page->min = size; 301 | page->cnt = idx; 302 | 303 | // update insert slot index 304 | // for newly cleaned-up page 305 | 306 | set->slotIdx = newSlot; 307 | 308 | // return temporary frame 309 | 310 | addSlotToFrame(idxMap, listFree(index,addr.type), NULL, addr.bits); 311 | 312 | // see if page has enough space now, or does it still need splitting? 313 | 314 | if( page->min >= (idx+1) * sizeof(Btree1Slot) + sizeof(*page) + totKeyLen ) 315 | return DB_OK; 316 | 317 | return DB_BTREE_needssplit; 318 | } 319 | 320 | */ 321 | -------------------------------------------------------------------------------- /btree1/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /btree2/btree2.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "btree2.h" 4 | #include "btree2_slot.h" 5 | 6 | // create an empty page 7 | 8 | uint64_t btree2NewPage (Handle *index, uint8_t lvl) { 9 | DbMap *idxMap = MapAddr(index); 10 | Btree2Index *btree2 = btree2index(idxMap); 11 | Btree2PageType type; 12 | Btree2Page *page; 13 | uint32_t size; 14 | DbAddr addr; 15 | 16 | size = btree2->pageSize; 17 | type = Btree2_interior; 18 | 19 | if (!lvl) { 20 | size <<= btree2->leafXtra; 21 | type = Btree2_leafPage; 22 | } 23 | 24 | // allocate page 25 | 26 | if ((addr.bits = allocObj(idxMap, idxMap->arena->usrFrame, type, size, true))) 27 | page = getObj(idxMap, addr); 28 | else 29 | return 0; 30 | 31 | page->alloc->nxt = (size >> btree2->skipBits) - 1; 32 | page->alloc->state = Btree2_pageactive; 33 | page->pageBits = btree2->pageBits; 34 | page->leafXtra = btree2->leafXtra; 35 | page->skipBits = btree2->skipBits; 36 | page->pageType = type; 37 | page->size = size; 38 | page->lvl = lvl; 39 | 40 | return addr.bits; 41 | } 42 | 43 | // initialize btree2 root page 44 | 45 | extern uint32_t cursorSize[]; 46 | 47 | DbStatus btree2Init(Handle *index, Params *params) { 48 | DbMap *idxMap = MapAddr(index); 49 | Btree2Index *btree2 = btree2index(idxMap); 50 | ObjId pageNo, *pageSlot; 51 | Btree2Page *page; 52 | DbAddr addr; 53 | 54 | if (params[Btree2Bits].intVal > Btree2_maxbits || params[Btree2Bits].intVal < Btree2_minbits ) { 55 | fprintf(stderr, "createIndex: bits = %" PRIu64 " > max = %d\n", params[Btree2Bits].intVal, Btree2_maxbits); 56 | exit(1); 57 | } 58 | 59 | if (params[Btree2Bits].intVal + params[Btree2Xtra].intVal > Btree2_maxbits || params[Btree2Bits].intVal < Btree2_minbits ) { 60 | fprintf(stderr, "createIndex: bits = %" PRIu64 " + xtra = %" PRIu64 " > max = %d\n", params[Btree2Bits].intVal, params[Btree2Xtra].intVal, Btree2_maxbits); 61 | exit(1); 62 | } 63 | 64 | btree2->pageSize = 1 << params[Btree2Bits].intVal; 65 | btree2->pageBits = (uint32_t)params[Btree2Bits].intVal; 66 | btree2->leafXtra = (uint32_t)params[Btree2Xtra].intVal; 67 | 68 | cursorSize[Hndl_btree2Index] = 1 << btree2->pageBits << btree2->leafXtra; 69 | 70 | // initial btree2 root/leaf page 71 | 72 | if ((addr.bits = btree2NewPage(index, 0))) 73 | page = getObj(idxMap, addr); 74 | else 75 | return DB_ERROR_outofmemory; 76 | 77 | if ((pageNo.bits = btree2AllocPageNo(index))) 78 | pageSlot = fetchIdSlot(idxMap, pageNo); 79 | else 80 | return DB_ERROR_outofmemory; 81 | 82 | page->pageNo.bits = pageNo.bits; 83 | page->attributes = Btree2_rootPage; 84 | pageSlot->bits = addr.bits; 85 | 86 | btree2->root.bits = pageNo.bits; 87 | btree2->right.bits = pageNo.bits; 88 | btree2->left.bits = pageNo.bits; 89 | 90 | // release arena 91 | 92 | idxMap->arena->type[0] = Hndl_btree2Index; 93 | return DB_OK; 94 | } 95 | 96 | // allocate btree2 pageNo 97 | 98 | uint64_t btree2AllocPageNo(Handle *index) { 99 | return allocObjId(MapAddr(index)); 100 | } 101 | 102 | bool btree2RecyclePage(Handle *index, int type, DbAddr addr) { 103 | DbMap *idxMap = MapAddr(index); 104 | Btree2Index *btree2 = btree2index(idxMap); 105 | 106 | return addSlotToFrame(idxMap, idxMap->arena->usrFrame[type].headFrame, addr.bits); 107 | } -------------------------------------------------------------------------------- /btree2/btree2.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../base64.h" 3 | #include "../db.h" 4 | #include "../db_object.h" 5 | #include "../db_handle.h" 6 | #include "../db_arena.h" 7 | #include "../db_map.h" 8 | #include "../db_api.h" 9 | #include "../db_cursor.h" 10 | #include "../db_frame.h" 11 | #include "../rwlock/readerwriter.h" 12 | 13 | #define Btree2_pagenobytes (2 + 7 + 1) 14 | #define Btree2_maxkey 4096 // max key size 15 | #define Btree2_maxtower 16 // max height of skip tower 16 | #define Btree2_maxslots 65536 // max skip entries 17 | #define Btree2_maxbits 29 // maximum page size in bits 18 | #define Btree2_minbits 9 // minimum page size in bits 19 | #define Btree2_minpage (1 << Btree2_minbits) // minimum page size 20 | #define Btree2_maxpage (1 << Btree2_maxbits) // maximum page size 21 | 22 | // tower slot status values 23 | 24 | typedef enum { 25 | TowerSlotEmpty = 0, 26 | TowerHeadSlot, 27 | TowerSlotOff 28 | } Btree2TowerSlot; 29 | 30 | // types of btree pages/allocations 31 | 32 | typedef enum { 33 | Btree2_pageNo = 1, 34 | Btree2_leafPage = 2, 35 | Btree2_interior = 3, 36 | MAXBtree2Type, 37 | } Btree2PageType; 38 | 39 | 40 | 41 | // page Attributes 42 | 43 | typedef enum { 44 | Btree2_rootPage = 0x10, 45 | } Btree2PageAttribute; 46 | 47 | // Btree2Index global data on disk after Arena 48 | 49 | typedef struct { 50 | DbIndex dbIndex[1]; 51 | uint32_t pageSize; 52 | uint8_t pageBits; 53 | uint8_t leafXtra; 54 | uint8_t skipBits; // unit size for skip list entries 55 | ObjId root; 56 | ObjId left; 57 | ObjId right; 58 | } Btree2Index; 59 | 60 | // Btree2 page layout 61 | 62 | typedef enum { 63 | Btree2_pageempty = 0, 64 | Btree2_pageactive= 1, // page is live 65 | Btree2_pageclean = 2, // page being redone or split 66 | Btree2_pageleft = 4, // page is leftmost 67 | Btree2_pageright = 8, // page is rightmost 68 | } Btree2PageState; 69 | 70 | // This structure is immediately 71 | // followed by the key slots 72 | 73 | typedef struct { 74 | union Btree2Alloc { 75 | struct { 76 | uint8_t state; 77 | uint8_t filler; 78 | uint16_t nxt; // next skip list storage unit 79 | }; 80 | Btree2PageState disp:8; 81 | uint8_t bytes[4]; 82 | uint32_t word[1]; 83 | } alloc[1]; 84 | uint32_t size; // page size 85 | uint16_t lFence, rFence;// fence slot offsets in skip units 86 | uint16_t garbage[1];// page garbage in skip units 87 | uint8_t attributes; // page attributes 88 | uint8_t height; // height of skip list 89 | uint8_t lvl; // level of page 90 | uint8_t pageBits; 91 | uint8_t leafXtra; 92 | uint8_t skipBits; // unit size for skip list allocations 93 | uint8_t pageType; // allocation type 94 | DbAddr newPage; // replacement page 95 | ObjId stopper; // page down chain of right-most pages 96 | ObjId pageNo; // page number 97 | ObjId right; // page number to right 98 | ObjId left; // page numberto left 99 | uint8_t bitLatch[Btree2_maxtower / 8]; 100 | uint16_t towerHead[Btree2_maxtower]; 101 | } Btree2Page; 102 | 103 | // Slot types 104 | 105 | typedef enum { 106 | Btree2_slotunused, // slot unused 107 | Btree2_slotactive, // slot active 108 | Btree2_slotmoved, // slot copied into new page version 109 | Btree2_slotdeleted, // slot deleted 110 | } Btree2SlotState; 111 | 112 | // Page key slot definition. 113 | // tower and key bytes follow these fields 114 | 115 | typedef struct { 116 | union { 117 | uint8_t keyBase[2]; 118 | uint16_t keyLen; 119 | }; 120 | uint8_t state[1]; 121 | uint8_t height; // final tower height 122 | uint8_t bitLatch[Btree2_maxtower / 8]; 123 | uint16_t tower[]; // skip-list tower 124 | } Btree2Slot; 125 | 126 | typedef struct { 127 | uint8_t rootLvl; // last discovered root level 128 | ObjId pageNo; // current page Number 129 | DbAddr pageAddr; // current page address 130 | Btree2Page *page; // current page content 131 | uint16_t found, off, next; // offset of new and next slot 132 | uint16_t prevOff[Btree2_maxtower]; 133 | } Btree2Set; 134 | 135 | typedef struct { 136 | DbCursor base[1]; // base object 137 | Btree2Page *page; // cursor position page buffer 138 | DbAddr pageAddr; // current page address 139 | uint32_t pageSize; // size of cursor page buffer 140 | uint16_t listIdx; // cursor position idx 141 | uint16_t listMax; // cursor position max 142 | uint16_t listFwd[Btree2_maxslots]; 143 | } Btree2Cursor; 144 | 145 | typedef struct { 146 | uint32_t lcgState[1]; // Lehmer's RNG state 147 | uint16_t nrandState[3]; // random number generator state 148 | } Btree2HandleXtra; 149 | 150 | #define btree2index(map) ((Btree2Index *)(map->arena + 1)) 151 | #define btree2HandleXtra(handle) ((Btree2HandleXtra *)(handle + 1)) 152 | 153 | DbStatus btree2NewCursor(DbCursor *cursor, DbMap *map); 154 | DbStatus btree2ReturnCursor(DbCursor *dbCursor, DbMap *map); 155 | 156 | DbStatus btree2LeftKey(DbCursor *cursor, DbMap *map); 157 | DbStatus btree2RightKey(DbCursor *cursor, DbMap *map); 158 | 159 | DbStatus btree2FindKey(DbCursor *cursor, DbMap *map, uint8_t *key, uint32_t keylen, bool onlyOne); 160 | DbStatus btree2NextKey (DbCursor *cursor, DbMap *map); 161 | DbStatus btree2PrevKey (DbCursor *cursor, DbMap *map); 162 | 163 | DbStatus btree2Init(Handle *hndl, Params *params); 164 | DbStatus btree2InsertKey(Handle *hndl, DbKeyValue *kv, uint8_t lvl, Btree2SlotState state); 165 | DbStatus btree2DeleteKey(Handle *hndl, uint8_t *key, uint32_t keyLen); 166 | 167 | uint16_t btree2LoadPage(DbMap *map, Btree2Set *set, uint8_t *key, uint32_t keyLen, uint8_t lvl); 168 | uint64_t btree2NewPage (Handle *hndl, uint8_t lvl); 169 | 170 | DbStatus btree2CleanPage(Handle *hndl, Btree2Set *set); 171 | DbStatus btree2SplitPage (Handle *hndl, Btree2Set *set); 172 | DbStatus btree2InstallKey(Btree2Set *set, uint8_t *key, uint32_t keyLen, uint8_t height); 173 | 174 | int btree2KeyCmp(uint8_t *key1, uint8_t *key2, uint32_t len1, uint32_t len2); 175 | void btree2FindSlot(Btree2Set *set, uint8_t *key, uint32_t keyLen); 176 | uint64_t btree2AllocPageNo(Handle *index); 177 | uint64_t btree2Get64 (Btree2Slot *slot); 178 | uint32_t btree2Store64(Btree2Slot *slot, uint64_t value); 179 | uint16_t btree2AllocSlot(Btree2Page *page, uint32_t bytes); 180 | uint16_t btree2FillFwd(Btree2Cursor *cursor, Btree2Page *page, uint16_t findOff, uint32_t pageSize); 181 | uint32_t btree2SlotSize(Btree2Slot *slot, uint8_t skipBits, uint8_t height); 182 | uint32_t btree2SizeSlot(uint32_t keyLen, uint8_t height); 183 | uint32_t btree2GenHeight(Handle *index); 184 | bool btree2RecyclePage(Handle *index, int type, DbAddr page); 185 | bool btree2SkipDead(Btree2Set *set); 186 | bool btree2DeadTower(Btree2Set *set); 187 | -------------------------------------------------------------------------------- /btree2/btree2_cursor.c: -------------------------------------------------------------------------------- 1 | #include "btree2.h" 2 | #include "btree2_slot.h" 3 | 4 | // strip key ordering into cursor listFwd 5 | // index zero is left EOF, index 1 is first key, 6 | // index listMax is last occupied index, 7 | // index listMax + 1 is right EOF. 8 | 9 | uint16_t btree2FillFwd(Btree2Cursor *cursor, Btree2Page *page, uint16_t findOff, uint32_t pageSize) { 10 | uint16_t off, foundIdx = 0; 11 | Btree2Slot *slot; 12 | 13 | memcpy (cursor->page, page, pageSize); 14 | off = cursor->page->towerHead[0]; 15 | cursor->listMax = 0; 16 | 17 | while( off ) { 18 | slot = slotptr(cursor->page, off); 19 | 20 | if( *slot->state == Btree2_slotactive) 21 | cursor->listFwd[++cursor->listMax] = off; 22 | 23 | if( off == findOff ) 24 | foundIdx = cursor->listMax; 25 | 26 | off = slot->tower[0]; 27 | } 28 | 29 | return foundIdx; 30 | } 31 | 32 | DbStatus btree2NewCursor(DbCursor *dbCursor, DbMap *map) { 33 | Btree2Cursor *cursor = (Btree2Cursor *)dbCursor; 34 | Btree2Index *btree2 = btree2index(map); 35 | uint32_t size; 36 | 37 | // allocate cursor page buffer 38 | 39 | size = btree2->pageSize << btree2->leafXtra; 40 | cursor->pageSize = size; 41 | 42 | cursor->pageAddr.bits = db_rawAlloc(size, false); 43 | cursor->page = db_rawObj(cursor->pageAddr); 44 | cursor->listIdx = 0; 45 | return DB_OK; 46 | } 47 | 48 | DbStatus btree2ReturnCursor(DbCursor *dbCursor, DbMap *map) { 49 | Btree2Cursor *cursor = (Btree2Cursor *)dbCursor; 50 | 51 | // return cursor page buffer 52 | 53 | db_memFree(cursor->pageAddr); 54 | return DB_OK; 55 | } 56 | 57 | DbStatus btree2LeftKey(DbCursor *dbCursor, DbMap *map) { 58 | Btree2Cursor *cursor = (Btree2Cursor *)dbCursor; 59 | Btree2Index *btree2 = btree2index(map); 60 | DbAddr *pageNoPtr; 61 | uint32_t pageSize; 62 | Btree2Page *left; 63 | 64 | pageNoPtr = fetchIdSlot (map, btree2->left); 65 | left = getObj(map, *pageNoPtr); 66 | 67 | pageSize = 1 << (left->pageBits + left->leafXtra); 68 | 69 | if( cursor->pageSize < pageSize ) 70 | return DB_ERROR_cursoroverflow; 71 | 72 | btree2FillFwd(cursor, left, 0, pageSize); 73 | cursor->listIdx = 0; 74 | return DB_OK; 75 | } 76 | 77 | DbStatus btree2RightKey(DbCursor *dbCursor, DbMap *map) { 78 | Btree2Cursor *cursor = (Btree2Cursor *)dbCursor; 79 | Btree2Index *btree2 = btree2index(map); 80 | DbAddr *pageNoPtr; 81 | uint32_t pageSize; 82 | Btree2Page *right; 83 | 84 | pageNoPtr = fetchIdSlot (map, btree2->right); 85 | right = getObj(map, *pageNoPtr); 86 | 87 | pageSize = 1 << (right->pageBits + right->leafXtra); 88 | 89 | if( cursor->pageSize < pageSize ) 90 | return DB_ERROR_cursoroverflow; 91 | 92 | btree2FillFwd(cursor, right, 0, pageSize); 93 | cursor->listIdx = cursor->listMax + 1; 94 | return DB_OK; 95 | } 96 | 97 | DbStatus btree2NextKey (DbCursor *dbCursor, DbMap *map) { 98 | Btree2Cursor *cursor = (Btree2Cursor *)dbCursor; 99 | DbAddr *pageNoPtr; 100 | Btree2Page *right; 101 | uint32_t pageSize; 102 | Btree2Slot *slot; 103 | uint16_t off; 104 | uint8_t *key; 105 | 106 | switch (dbCursor->state) { 107 | case CursorNone: 108 | btree2LeftKey(dbCursor, map); 109 | break; 110 | 111 | case CursorRightEof: 112 | return DB_CURSOR_eof; 113 | 114 | default: 115 | break; 116 | } 117 | 118 | while (true) { 119 | while( cursor->listIdx < cursor->listMax ) { 120 | off = cursor->listFwd[++cursor->listIdx]; 121 | slot = slotptr(cursor->page, off); 122 | 123 | if( *slot->state == Btree2_slotactive ) 124 | key = slotkey(slot); 125 | else 126 | continue; 127 | 128 | dbCursor->key = slotkey(slot); 129 | dbCursor->keyLen = slot->keyLen; 130 | dbCursor->state = CursorPosAt; 131 | return DB_OK; 132 | } 133 | 134 | if (cursor->page->right.bits) { 135 | pageNoPtr = fetchIdSlot (map, cursor->page->right); 136 | right = getObj(map, *pageNoPtr); 137 | } else 138 | break; 139 | 140 | pageSize = 1 << (right->pageBits + right->leafXtra); 141 | 142 | if( cursor->pageSize < pageSize ) 143 | return DB_ERROR_cursoroverflow; 144 | 145 | btree2FillFwd(cursor, right, 0, pageSize); 146 | cursor->listIdx = 0; 147 | } 148 | 149 | dbCursor->state = CursorRightEof; 150 | return DB_CURSOR_eof; 151 | } 152 | 153 | DbStatus btree2PrevKey (DbCursor *dbCursor, DbMap *map) { 154 | Btree2Cursor *cursor = (Btree2Cursor *)dbCursor; 155 | DbAddr *pageNoPtr; 156 | uint32_t pageSize; 157 | Btree2Page *left; 158 | Btree2Slot *slot; 159 | uint16_t off; 160 | uint8_t *key; 161 | 162 | switch (dbCursor->state) { 163 | case CursorNone: 164 | btree2RightKey(dbCursor, map); 165 | break; 166 | 167 | case CursorLeftEof: 168 | return DB_CURSOR_eof; 169 | 170 | default: 171 | break; 172 | } 173 | 174 | while (true) { 175 | if (cursor->listIdx > 1) { 176 | off = cursor->listFwd[--cursor->listIdx]; 177 | slot = slotptr(cursor->page, off); 178 | 179 | if( *slot->state == Btree2_slotactive ) 180 | key = slotkey(slot); 181 | else 182 | continue; 183 | 184 | dbCursor->key = slotkey(slot); 185 | dbCursor->keyLen = slot->keyLen; 186 | dbCursor->state = CursorPosAt; 187 | return DB_OK; 188 | } 189 | 190 | if (cursor->page->left.bits) { 191 | pageNoPtr = fetchIdSlot (map, cursor->page->left); 192 | left = getObj(map, *pageNoPtr); 193 | } else 194 | break; 195 | 196 | pageSize = 1 << (left->pageBits + left->leafXtra); 197 | 198 | if( cursor->pageSize < pageSize ) 199 | return DB_ERROR_cursoroverflow; 200 | 201 | btree2FillFwd(cursor, left, 0, pageSize); 202 | cursor->listIdx = cursor->listMax + 1; 203 | } 204 | 205 | dbCursor->state = CursorLeftEof; 206 | return DB_CURSOR_eof; 207 | } 208 | -------------------------------------------------------------------------------- /btree2/btree2_delete.c: -------------------------------------------------------------------------------- 1 | #include "btree2.h" 2 | #include "btree2_slot.h" 3 | 4 | bool btree2DeadTower(Btree2Set *set) { 5 | return true; 6 | } 7 | 8 | DbStatus btree2DeleteKey(Handle *index, uint8_t *key, uint32_t keyLen) { 9 | DbMap *idxMap = MapAddr(index); 10 | Btree2Index *btree2 = btree2index(idxMap); 11 | Btree2Slot *slot; 12 | Btree2Set set[1]; 13 | uint16_t next; 14 | 15 | memset(set, 0, sizeof(set)); 16 | 17 | // find the level 0 page containing the key 18 | 19 | if ((next = btree2LoadPage(idxMap, set, key, keyLen, 0))) 20 | slot = slotptr (set->page, next); 21 | else 22 | return DB_ERROR_deletekey; 23 | 24 | if( set->found ) 25 | if( atomicCAS8(slot->state, Btree2_slotactive, Btree2_slotdeleted) ) 26 | btree2DeadTower(set); 27 | 28 | return DB_OK; 29 | } 30 | 31 | 32 | -------------------------------------------------------------------------------- /btree2/btree2_find.c: -------------------------------------------------------------------------------- 1 | #include "btree2.h" 2 | #include "btree2_slot.h" 3 | 4 | // move cursor to first key >= given key 5 | 6 | DbStatus btree2FindKey( DbCursor *dbCursor, DbMap *map, uint8_t *key, uint32_t keyLen, bool onlyOne) { 7 | Btree2Cursor *cursor = (Btree2Cursor *)dbCursor; 8 | uint32_t pageSize; 9 | uint8_t *foundKey; 10 | uint16_t next; 11 | Btree2Set set[1]; 12 | Btree2Slot *slot; 13 | 14 | // find the level 0 page containing the key 15 | 16 | memset (set, 0, sizeof(set)); 17 | 18 | if( (next = btree2LoadPage(map, set, key, keyLen, 0))) 19 | slot = slotptr (set->page, next); 20 | else 21 | return DB_ERROR_keynotfound; 22 | 23 | foundKey = slotkey(slot); 24 | cursor->base->state = CursorPosAt; 25 | 26 | if (onlyOne) { 27 | return DB_OK; 28 | } 29 | 30 | pageSize = 1 << (set->page->pageBits + set->page->leafXtra); 31 | 32 | if( cursor->pageSize < pageSize ) 33 | return DB_ERROR_cursoroverflow; 34 | 35 | cursor->listIdx = btree2FillFwd(cursor, set->page, set->off, pageSize); 36 | return DB_OK; 37 | } 38 | -------------------------------------------------------------------------------- /btree2/btree2_skip.c: -------------------------------------------------------------------------------- 1 | #include "btree2.h" 2 | #include "btree2_slot.h" 3 | 4 | // implement skip list in btree page 5 | 6 | // compare two keys, return > 0, = 0, or < 0 7 | // =0: all key fields are same 8 | // -1: key2 > key1 9 | // +1: key2 < key1 10 | 11 | int btree2KeyCmp (uint8_t *key1, uint8_t *key2, uint32_t len1, uint32_t len2) { 12 | int ans; 13 | 14 | if((ans = memcmp (key1, key2, len1 > len2 ? len2 : len1))) 15 | return ans; 16 | 17 | if( len1 > len2 ) 18 | return 1; 19 | if( len1 < len2 ) 20 | return -1; 21 | 22 | return 0; 23 | } 24 | 25 | // find and load page at given level for given key 26 | // returm slot with key ,ge. given key 27 | 28 | uint16_t btree2LoadPage(DbMap *map, Btree2Set *set, uint8_t *key, uint32_t keyLen, uint8_t lvl) { 29 | Btree2Index *btree2 = btree2index(map); 30 | uint16_t *tower; 31 | uint16_t towerOff; 32 | ObjId *pageNoPtr; 33 | Btree2Slot *slot = NULL; 34 | int idx, result = 0; 35 | bool targetLvl; 36 | 37 | // Starting at the page head tower go down or right after each comparison 38 | // build up previous path through the towers into prevOff with either 39 | // the offset or zero to indicate the towerHead slot 40 | 41 | set->pageNo.bits = btree2->root.bits; 42 | 43 | // start at the root level of the btree2 and drill down 44 | 45 | do { 46 | pageNoPtr = fetchIdSlot (map, set->pageNo); 47 | set->pageAddr.bits = pageNoPtr->bits; 48 | set->page = getObj(map, set->pageAddr); 49 | 50 | targetLvl = set->page->lvl == lvl; 51 | 52 | if( set->page->lvl > set->rootLvl ) 53 | set->rootLvl = set->page->lvl; 54 | 55 | // build vector of slots that are lexically 56 | // before the key and whose towers point 57 | // to slots past the key. A slot at offset zero 58 | // are referring to towerHead slots for the page. 59 | 60 | memset (set->prevOff, 0, sizeof set->prevOff); 61 | tower = set->page->towerHead; 62 | towerOff = TowerHeadSlot; 63 | idx = set->page->height; 64 | 65 | while( idx-- ) 66 | do { 67 | set->prevOff[idx] = towerOff; 68 | 69 | if( (set->next = tower[idx]) ) 70 | slot = slotptr (set->page, set->next); // test right 71 | else 72 | break; 73 | 74 | result = btree2KeyCmp (slotkey(slot), key, slot->keyLen, keyLen); 75 | 76 | if( targetLvl && result == 0 ) 77 | set->found = towerOff; 78 | 79 | if( result >= 0 ) // new key is .le. next key, go down 80 | break; 81 | 82 | // to find a larger candidate, go right in tower 83 | 84 | towerOff = tower[idx]; 85 | tower = slot->tower; 86 | } while( true ); 87 | 88 | if( targetLvl ) 89 | return towerOff; 90 | 91 | // The key is .lt. every key in the page towerHead vector 92 | 93 | if( towerOff == TowerHeadSlot ) { 94 | if( set->page->left.bits ) { 95 | Btree2Slot *lFenceSlot = slotptr (set->page, set->page->lFence); // test left fence 96 | int fResult = btree2KeyCmp (slotkey(lFenceSlot), key, lFenceSlot->keyLen, keyLen); 97 | if( fResult >= 0 ) { 98 | set->pageNo.bits = set->page->left.bits; 99 | continue; 100 | } 101 | } 102 | } 103 | 104 | if( set->next == 0 ) { 105 | if( set->page->stopper.bits ) { 106 | set->pageNo.bits = set->page->stopper.bits; 107 | continue; 108 | } 109 | if( set->page->right.bits ) { 110 | set->pageNo.bits = set->page->right.bits; 111 | continue; 112 | } 113 | } 114 | 115 | // otherwise follow slot that is .ge. the search key 116 | 117 | set->pageNo.bits = btree2Get64 (slot); 118 | 119 | } while( set->pageNo.bits ); 120 | 121 | return DB_BTREE_error; 122 | } 123 | 124 | // find next non-dead slot -- the fence key if nothing else 125 | /* 126 | bool btree2SkipDead (Btree2Set *set) { 127 | Btree2Slot *slot = slotptr(set->page, set->prev); 128 | 129 | while( *slot->state == Btree2_slotdeleted ) { 130 | set->prevOff[0] = set->prev; 131 | if( (set->prev = slot->tower[0]) ) // successor offset 132 | slot = slotptr(set->page, set->prev); 133 | else 134 | return 0; 135 | } 136 | 137 | return 1; 138 | }*/ 139 | -------------------------------------------------------------------------------- /btree2/btree2_slot.h: -------------------------------------------------------------------------------- 1 | // macroes for slot access 2 | 3 | #define slotkey(slot) (slot->keyBase + sizeof(Btree2Slot) + slot->height * sizeof(uint16_t)) 4 | 5 | #define slotptr(page, off) (off ? (Btree2Slot *)((uint8_t *)page + (off << page->skipBits)) : db_abort(off > 0, "slot specified with zero offset", NULL)) 6 | -------------------------------------------------------------------------------- /btree2/btree2_util.c: -------------------------------------------------------------------------------- 1 | #include "btree2.h" 2 | #include "btree2_slot.h" 3 | #include 4 | 5 | // debug slot function 6 | 7 | #ifdef DEBUG 8 | Btree2Slot *btree2Slot(Btree2Page *page, uint32_t off) 9 | { 10 | return slotptr(page, off); 11 | } 12 | 13 | uint8_t *btree2Key(Btree2Slot *slot) 14 | { 15 | return slotkey(slot); 16 | } 17 | 18 | #undef slotkey 19 | #undef slotptr 20 | #define slotkey(s) btree2Key(s) 21 | #define slotptr(p,x) btree2Slot(p,x) 22 | #endif 23 | 24 | // calc size of slot 25 | 26 | uint32_t btree2SlotSize(Btree2Slot *slot, uint8_t skipBits, uint8_t height) 27 | { 28 | uint8_t *key = slotkey(slot); 29 | uint32_t size; 30 | 31 | size = sizeof(*slot) + slot->keyLen; 32 | size += (height ? height : slot->height) * sizeof(uint16_t); 33 | return size; 34 | } 35 | 36 | uint32_t lcg_parkmiller(uint32_t *state); 37 | 38 | // generate slot tower height (1-15) 39 | // w/frequency 1/2 down to 1/65536 40 | 41 | 42 | uint32_t btree2GenHeight(Handle *index) { 43 | Btree2HandleXtra *hndlXtra = ((Btree2HandleXtra *)(index + 1)); 44 | uint32_t nrand32 = mynrand48(hndlXtra->nrandState); 45 | // uint32_t nrand32 = lcg_parkmiller(index->lcgState); 46 | 47 | nrand32 |= 0x10000; 48 | 49 | #ifdef _WIN32 50 | return __lzcnt(nrand32); 51 | #else 52 | return __builtin_clz(nrand32); 53 | #endif 54 | } 55 | 56 | // calculate amount of space needed to install slot in page 57 | // include key length, docId, and tower height 58 | 59 | uint32_t btree2SizeSlot (uint32_t keySize, uint8_t height) 60 | { 61 | uint32_t amt = (uint16_t)(sizeof(Btree2Slot) + height * sizeof(uint16_t) + keySize); 62 | 63 | return amt; 64 | } 65 | 66 | // allocate space for new slot (in skipBits units) 67 | 68 | uint16_t btree2AllocSlot(Btree2Page *page, uint32_t bytes) { 69 | uint16_t base = (sizeof(*page) + (1ULL << page->skipBits) - 1) >> page->skipBits; 70 | uint16_t size = (uint16_t)(bytes + (1ULL << page->skipBits) - 1) >> page->skipBits; 71 | union Btree2Alloc alloc[1], before[1]; 72 | 73 | do { 74 | *before->word = *page->alloc->word; 75 | *alloc->word = *before->word; 76 | 77 | if( alloc->nxt > base + size ) 78 | if( alloc->state == Btree2_pageactive ) 79 | alloc->nxt -= size; 80 | else 81 | return 0; 82 | else 83 | return 0; 84 | 85 | } while( !atomicCAS32(page->alloc->word, *before->word, *alloc->word) ); 86 | 87 | return alloc->nxt; 88 | } 89 | 90 | uint64_t btree2Get64 (Btree2Slot *slot) { 91 | uint8_t *key = slotkey(slot); 92 | 93 | return get64 (key, slot->keyLen); 94 | } 95 | 96 | uint32_t btree2Store64 (Btree2Slot *slot, uint64_t value) { 97 | uint8_t *key = slotkey(slot); 98 | 99 | return store64(key, slot->keyLen, value); 100 | } 101 | -------------------------------------------------------------------------------- /btree2/readme.md: -------------------------------------------------------------------------------- 1 | This is an advanced btree implementation that doesn't use page locks. The keys on each page are stored by their allocated page offsets and organized into a lazy skip list. Each page is identified by its logical page number (objectIDs), and indirect through the objectID slots to retrieve the page's physical address. When a page fills two new pages are split from the original full page with their logical page numbers and fence keys installed in the next higher level closer to the root. The original full page is then retired. 2 | 3 | Each variable length key on the page includes a random height skip tower with next key offsets. Level zero of the towers form a sorted list of the keys. New keys are spliced into this list with CAS, and are incorporated into the tower levels by subsequent descent operations. Deleted keys are marked deleted and removed from the tower evels by subsequent descent operations. 4 | -------------------------------------------------------------------------------- /build: -------------------------------------------------------------------------------- 1 | gcc -std=gnu99 -O0 -g -o standalone db*.c base64.c -D RDTSC mvcc_db*.c artree/*.c btree1/*.c btree2/*.c standalone.c mutex/mutex.c rwlock/readerwriter.c Hi-Performance-Timestamps/timestamps.c -lpthread -latomic 2 | -------------------------------------------------------------------------------- /build.bat: -------------------------------------------------------------------------------- 1 | cl /W3 /Ox /Z7 /Oi /Fm /D RDTSC standalone.c base64.c db*.c mvcc_db*.c artree/*.c btree1/*.c btree2/*.c mutex/mutex.c rwlock/readerwriter.c Hi-Performance-Timestamps/timestamps.c setargv.obj 2 | -------------------------------------------------------------------------------- /build.osx: -------------------------------------------------------------------------------- 1 | gcc -Dunix -Dapple -std=c11 -O2 -g -o dbtest -D RDTSC base64.c db*.c artree/*.c btree1/*.c btree2/*.c standalone.c -lpthread 2 | -------------------------------------------------------------------------------- /build.wsl: -------------------------------------------------------------------------------- 1 | gcc -std=c11 -Wall -Wshadow -Wpointer-arith -Wstrict-prototypes -D WSL -O2 -ggdb -o standalone -fno-omit-frame-pointer standalone.c db*.c mvcc*.c btree1/*.c btree2/*.c artree/*.c mutex/mutex.c rwlock/readerwriter.c Hi-Performance-Timestamps/time*.c base64.c -lpthread -latomic 2 | -------------------------------------------------------------------------------- /database.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.28307.329 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "database", "database.vcxproj", "{37384FBC-66EE-4A45-9ECD-4CD26FE27A17}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {37384FBC-66EE-4A45-9ECD-4CD26FE27A17}.Debug|x64.ActiveCfg = Debug|x64 15 | {37384FBC-66EE-4A45-9ECD-4CD26FE27A17}.Debug|x64.Build.0 = Debug|x64 16 | {37384FBC-66EE-4A45-9ECD-4CD26FE27A17}.Release|x64.ActiveCfg = Release|x64 17 | {37384FBC-66EE-4A45-9ECD-4CD26FE27A17}.Release|x64.Build.0 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {0A4C0B8E-4A82-4F85-84DC-5D48A04BAAEF} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /database.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | Source Files 26 | 27 | 28 | Source Files 29 | 30 | 31 | Source Files 32 | 33 | 34 | Source Files 35 | 36 | 37 | Source Files 38 | 39 | 40 | Source Files 41 | 42 | 43 | Source Files 44 | 45 | 46 | Source Files 47 | 48 | 49 | Source Files 50 | 51 | 52 | Source Files 53 | 54 | 55 | Source Files 56 | 57 | 58 | Source Files 59 | 60 | 61 | Source Files 62 | 63 | 64 | Source Files 65 | 66 | 67 | Source Files 68 | 69 | 70 | Source Files 71 | 72 | 73 | Source Files 74 | 75 | 76 | Source Files 77 | 78 | 79 | Source Files 80 | 81 | 82 | Source Files 83 | 84 | 85 | Source Files 86 | 87 | 88 | Source Files 89 | 90 | 91 | Source Files 92 | 93 | 94 | Source Files 95 | 96 | 97 | Source Files 98 | 99 | 100 | Source Files 101 | 102 | 103 | Source Files 104 | 105 | 106 | Source Files 107 | 108 | 109 | Source Files 110 | 111 | 112 | Source Files 113 | 114 | 115 | Source Files 116 | 117 | 118 | Source Files 119 | 120 | 121 | Source Files 122 | 123 | 124 | Source Files 125 | 126 | 127 | Source Files 128 | 129 | 130 | Source Files 131 | 132 | 133 | Source Files 134 | 135 | 136 | Source Files 137 | 138 | 139 | Source Files 140 | 141 | 142 | Source Files 143 | 144 | 145 | 146 | 147 | Header Files 148 | 149 | 150 | Header Files 151 | 152 | 153 | Header Files 154 | 155 | 156 | Header Files 157 | 158 | 159 | Header Files 160 | 161 | 162 | Header Files 163 | 164 | 165 | Header Files 166 | 167 | 168 | Header Files 169 | 170 | 171 | Header Files 172 | 173 | 174 | Header Files 175 | 176 | 177 | Header Files 178 | 179 | 180 | Header Files 181 | 182 | 183 | Header Files 184 | 185 | 186 | Header Files 187 | 188 | 189 | Header Files 190 | 191 | 192 | Header Files 193 | 194 | 195 | Header Files 196 | 197 | 198 | Header Files 199 | 200 | 201 | Header Files 202 | 203 | 204 | Header Files 205 | 206 | 207 | Header Files 208 | 209 | 210 | Header Files 211 | 212 | 213 | Header Files 214 | 215 | 216 | Header Files 217 | 218 | 219 | Header Files 220 | 221 | 222 | -------------------------------------------------------------------------------- /db.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef _WIN32 4 | #include 5 | #else 6 | #define WIN32_LEAN_AND_MEAN 7 | #include 8 | #endif 9 | 10 | extern bool Btree1_stats, debug; 11 | 12 | // general object pointer 13 | 14 | typedef union { 15 | uint64_t bits; 16 | uint64_t addr:48; // address part of struct below 17 | uint64_t verNo:48; // document version number 18 | 19 | struct { 20 | uint32_t off; // 16 byte offset in segment 21 | uint16_t seg; // slot index in arena segment array 22 | union { 23 | uint8_t step:8; 24 | uint16_t xtra[1]; // xtra bits 25 | uint8_t latch[1]; 26 | struct { 27 | uint8_t mutex :1; // mutex bit 28 | uint8_t kill :1; // kill entry 29 | uint8_t type :6; // object type 30 | union { 31 | uint8_t nbyte; // number of bytes in a span node 32 | uint8_t nslot; // number of frame slots in use 33 | uint8_t maxidx; // maximum slot index in use 34 | uint8_t firstx; // first array inUse to chk 35 | uint8_t ttype; // index transaction type 36 | uint8_t docIdx; // document key index no 37 | int8_t rbcmp; // red/black comparison 38 | }; 39 | }; 40 | }; 41 | }; 42 | } DbAddr, ObjId, DocId; 43 | 44 | #define TYPE_SHIFT (6*8 + 2) // number of bits to shift type left and zero all bits 45 | #define BYTE_SHIFT (2) // number of bits to shift type left and zero latch 46 | #define MUTEX_BIT 0x01 47 | #define KILL_BIT 0x02 48 | #define TYPE_BITS 0xFC 49 | 50 | #define ADDR_MUTEX_SET 0x0001000000000000ULL 51 | #define ADDR_KILL_SET 0x0002000000000000ULL 52 | #define ADDR_BITS 0x0000ffffffffffffULL 53 | /* 54 | typedef union { 55 | struct { 56 | uint32_t idx; // record ID in the segment 57 | uint16_t seg; // arena segment number 58 | union { 59 | uint8_t step :8; 60 | uint16_t xtra[1]; // xtra bits 61 | }; 62 | }; 63 | uint64_t addr:48; // address part of struct above 64 | uint64_t bits; 65 | } ObjId; 66 | */ 67 | #define MAX_key 1024 68 | 69 | // string /./content 70 | 71 | typedef struct { 72 | uint16_t len; 73 | uint8_t str[]; 74 | } DbString; 75 | 76 | typedef struct SkipHead_ SkipHead; 77 | typedef struct DbMap_ DbMap; 78 | 79 | // param slots 80 | 81 | typedef enum { 82 | Size = 0, // total Params structure size (int) 83 | OnDisk, // Arena resides on disk (bool) 84 | InitSize, // initial arena size (int) 85 | ObjIdSize, // size of arena ObjId array element (int) 86 | ClntSize, // Handle client area size (DbCursor, Iterator) (int) 87 | XtraSize, // Handle client extra storage (leaf page buffer) (int) 88 | ArenaXtra, // extra bytes in arena (DbIndex, DocStore) (int) 89 | 90 | RecordType = 10,// arena document record type: 0=raw, 1=mvcc 91 | MvccBlkSize, // initial mvcc document size 92 | 93 | IdxKeyUnique = 15, // index keys uniqueness constraint (bool) 94 | IdxKeyDeferred, // uniqueness constraints deferred to commit (bool) 95 | IdxKeyAddr, // index key definition address 96 | IdxKeySparse, 97 | IdxKeyPartial, // offset of partial document 98 | IdxKeyFlds, // store field lengths in keys (bool) 99 | IdxType, // 0 for artree, 1 & 2 for btree (int) 100 | IdxNoDocs, // stand-alone index file (bool) 101 | 102 | Btree1Bits = 25, // Btree1 page size in bits (int) 103 | Btree1Xtra, // leaf page extra bits (int) 104 | 105 | Btree2Bits = 28, // Btree2 page size in bits (int) 106 | Btree2Xtra, // leaf page extra bits (int) 107 | 108 | CursorDeDup = 30, // de-duplicate cursor results (bool) 109 | Concurrency, 110 | ResultSetSize, // # cursor keys or # iterator docs returned (int) 111 | 112 | UserParams = 40, 113 | MaxParam = 64 // count of param slots defined 114 | } ParamSlot; 115 | 116 | typedef union { 117 | uint64_t intVal; 118 | uint32_t offset; 119 | double dblVal; 120 | uint32_t wordVal; 121 | char charVal; 122 | bool boolVal; 123 | DbAddr addr; 124 | void *obj; 125 | } Params; 126 | 127 | // cursor move/positioning operations 128 | 129 | typedef enum { 130 | OpLeft = 'l', 131 | OpRight = 'r', 132 | OpNext = 'n', 133 | OpPrev = 'p', 134 | OpFind = 'f', 135 | OpOne = 'o', 136 | OpBefore = 'b', 137 | OpAfter = 'a' 138 | } CursorOp; 139 | 140 | // user's DbHandle 141 | // contains the Handle ObjId bits 142 | 143 | typedef union { 144 | ObjId hndlId; 145 | uint64_t hndlBits; 146 | } DbHandle; 147 | 148 | // DbVector definition 149 | 150 | typedef struct { 151 | uint8_t latch[1]; 152 | uint8_t type; 153 | uint16_t vecLen; 154 | uint16_t vecMax; 155 | DbAddr next, vector[1]; 156 | } DbVector; 157 | 158 | uint32_t vectorPush(DbMap*, DbVector *, DbAddr); 159 | DbAddr *vectorFind(DbMap*, DbVector *, uint32_t); 160 | 161 | #define HandleAddr(id) fetchIdSlot(hndlMap, id) 162 | #define MapAddr(handle) (DbMap *)(db_memObj(handle->mapAddr)) 163 | #define ClntAddr(handle) getObj(hndlMap, handle->clientAddr) 164 | 165 | DbMap *hndlMap; 166 | 167 | // document header in docStore 168 | 169 | #include "db_arena.h" 170 | #include "db_index.h" 171 | #include "db_cursor.h" 172 | #include "db_map.h" 173 | #include "db_error.h" 174 | #include "db_frame.h" 175 | #include "db_api.h" 176 | #include "db_malloc.h" 177 | #include "db_error.h" 178 | #include "db_handle.h" 179 | #include "db_object.h" 180 | 181 | #include "db_object.h" 182 | #include "db_handle.h" 183 | -------------------------------------------------------------------------------- /db_api.h: -------------------------------------------------------------------------------- 1 | // database API interface 2 | 3 | #pragma once 4 | #include 5 | #include "base64.h" 6 | #include "db.h" 7 | 8 | extern DbMap *hndlMap; 9 | 10 | // document header in docStore 11 | // next hdrs in set follow, up to docMin 12 | 13 | typedef enum { 14 | VerRaw, 15 | VerMvcc 16 | } DocType; 17 | 18 | typedef struct { 19 | union { 20 | uint8_t base[4]; 21 | uint32_t refCnt[1]; 22 | }; 23 | uint32_t docSize; 24 | DocType docType:8; 25 | DbAddr keyValues; 26 | DocId docId[1]; 27 | } DbDoc; 28 | 29 | // fields in basic key 30 | // database docStore Arena extension 31 | 32 | typedef struct { 33 | uint64_t docCount[1]; // count of active document ID 34 | uint32_t blkSize; // standard new mvccDoc size 35 | uint16_t keyCnt; // number of cached keys per version 36 | DocType docType:16; // docStore raw, or under mvcc 37 | } DocStore; 38 | 39 | 40 | 41 | // Unique Key evaluation fcn 42 | 43 | typedef bool(UniqCbFcn)(DbMap *map, DbCursor *dbCursor); 44 | 45 | void initialize(void); 46 | 47 | DbStatus openDatabase(DbHandle *hndl, char *name, uint32_t len, 48 | Params *params); 49 | DbStatus openDocStore(DbHandle *hndl, DbHandle dbHndl, char *name, uint32_t len, Params *params); 50 | DbStatus createIndex(DbHandle *hndl, DbHandle docHndl, char *name,uint32_t len, Params *params); 51 | DbStatus cloneHandle(DbHandle *hndl, DbHandle fromHndl); 52 | DbStatus dropArena(DbHandle hndl, bool dropDefinitions); 53 | DbStatus closeHandle(DbHandle dbHndl); 54 | 55 | DbStatus createCursor(DbHandle *hndl, DbHandle idxHndl, Params *params); 56 | DbStatus closeCursor(DbHandle dbHndl); 57 | DbStatus positionCursor(DbHandle hndl, CursorOp op, void *key, uint32_t keyLen); 58 | DbStatus keyAtCursor(DbHandle hndl, DocId *docId, uint8_t **key, uint32_t *keyLen); 59 | DbStatus moveCursor(DbHandle hndl, CursorOp op); 60 | 61 | DbStatus insertKey(DbHandle hndl, DbKeyValue *kv); 62 | DbStatus deleteKey(DbHandle hndl, uint8_t *key, uint32_t len, uint64_t suffix); 63 | 64 | uint64_t arenaAlloc(DbHandle arenaHndl, uint32_t size, bool zeroit, 65 | bool dbArena); 66 | 67 | DbStatus storeDoc(DbHandle hndl, void *obj, uint32_t objSize, DocId *docId); 68 | DbStatus deleteDoc(DbHandle hndl, DocId docId); 69 | DbDoc *fetchDoc(DbHandle hndl, DocId docId); 70 | 71 | void *docStoreObj(DbAddr addr); 72 | -------------------------------------------------------------------------------- /db_arena.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db_error.h" 4 | #include "db_redblack.h" 5 | 6 | #ifdef _WIN32 7 | #define WIN32_LEAN_AND_MEAN 8 | #include 9 | #endif 10 | 11 | #define MAX_segs 1000 12 | #define MIN_segbits 17 13 | #define MIN_segsize (1ULL << MIN_segbits) 14 | #define MAX_segsize (1ULL << (32 + 4)) // 32 bit offset and 4 bit multiplier 15 | 16 | #define MAX_path 4096 17 | #define MAX_blk 49 // max arena blk size in half bits 18 | #define MAX_sys 5 // max user frame type 19 | #define MAX_usr 32 // max user frame type 20 | 21 | // disk arena segment 22 | 23 | typedef struct { 24 | uint64_t off; // file offset of segment 25 | uint64_t size; // size of the segment 26 | DbAddr nextObject; // next Object address 27 | uint32_t maxId; // highest object ID in use 28 | } DbSeg; 29 | 30 | // arena creation specifications 31 | // data is permanent in database arena 32 | 33 | typedef struct { 34 | uint64_t id; // our id in parent's child list 35 | uint64_t nxtVer; // next arena version when creating 36 | uint64_t childId; // highest child Id we've issued 37 | uint64_t creation; // milliseconds since 1/1/70 38 | uint32_t clntSize; // extra client space allocated in hndlMap 39 | uint32_t arenaXtra; // shared space after DbArena (DocStore, DbIndex) 40 | uint32_t objSize; // size of ObjectId array slot 41 | uint32_t xtraSize; // extra handle space after Handle 42 | uint8_t arenaType; // type of the arena 43 | uint8_t sysTypes; // number of system frame types 44 | uint8_t numTypes; // number of user node frame types 45 | uint8_t dead[1]; // arena file killed/deleted 46 | DbAddr nameTree[1]; // child arena name red/black tree 47 | DbAddr childList[1]; // array of childId to arenaDef RbAddr 48 | DbAddr hndlArray[1]; // array of handle ids for this arena 49 | Params params[MaxParam]; // parameter array for rest of object 50 | } ArenaDef; 51 | 52 | // system object types 53 | 54 | typedef enum { 55 | freeFrame = 1, // frames 56 | freeObjId, // 57 | } SYSFrame; 58 | 59 | typedef struct { 60 | DbAddr headFrame[1]; // FIFO queue head 61 | DbAddr freeFrame[1]; // available free objects 62 | DbAddr tailFrame[1]; // waiting for timestamp to expire 63 | } TriadQueue; 64 | 65 | // arena at beginning of seg zero 66 | 67 | typedef struct { 68 | DbSeg segs[MAX_segs]; // segment meta-data 69 | uint64_t lowTs, delTs, nxtTs; // low hndl ts, Incr on delete 70 | 71 | DbAddr rbAddr[1]; // address of r/b entry 72 | TriadQueue blkFrame[MAX_blk]; // system created frames of objects 1/2 bit 73 | TriadQueue usrFrame[MAX_usr]; // user object frames 74 | TriadQueue sysFrame[MAX_sys]; // system objects frames 75 | // uint64_t objCount; // overall number of objects 76 | uint64_t objSpace; // overall size of objects 77 | uint32_t baseSize; // client space after DbArena (DbIndex, DocStore) 78 | uint32_t objSize; // size of ObjectId array slot 79 | uint16_t currSeg; // index of highest segment 80 | uint16_t objSeg; // current segment index for ObjIds 81 | volatile uint8_t mutex[1]; // arena allocation lock/drop flag 82 | uint8_t type[1]; // arena type 83 | uint8_t filler[128]; 84 | } DbArena; 85 | 86 | // per instance arena map structure 87 | // created when map is opened 88 | 89 | struct DbMap_ { 90 | char *base[MAX_segs]; // pointers to mapped segment memory 91 | #ifndef _WIN32 92 | int hndl; // OS file handle 93 | #else 94 | HANDLE hndl; 95 | HANDLE maphndl[MAX_segs]; 96 | #endif 97 | DbArena *arena; // ptr to mapped seg zero 98 | char *arenaPath; // file database path 99 | DbMap *parent, *db; // ptr to parent and database 100 | ArenaDef *arenaDef; // database configuration 101 | DbAddr childMaps[1]; // skipList of child DbMaps 102 | RedBlack *rbEntry; // redblack entry address 103 | char *name; // inMem map name 104 | uint32_t openCnt[1]; // count of open children 105 | uint32_t objSize; // size of ObjectId array slot 106 | uint16_t pathLen; // length of arena path 107 | uint16_t numSeg; // number of mapped segments 108 | uint8_t mapMutex[1]; // segment mapping mutex 109 | uint8_t drop[1]; // arena map being killed 110 | uint8_t type[1]; // arena type 111 | }; 112 | 113 | #define skipSize(addr) (((1ULL << addr->type) - sizeof(SkipNode)) / sizeof(SkipEntry)) 114 | 115 | #define SKIP_node 15 116 | 117 | // catalog structure 118 | 119 | typedef struct { 120 | ObjId objId[65536]; 121 | } IdxSeg; 122 | 123 | typedef struct { 124 | DbAddr databases[1]; // database names in the catalog 125 | // uint32_t maxEntry[1]; 126 | // uint32_t numEntries[1]; 127 | // DbAddr segAddr[1]; // preload fractions 128 | } Catalog; 129 | 130 | Catalog *catalog; 131 | 132 | /** 133 | * open/create arenas 134 | */ 135 | 136 | DbMap *openMap(DbMap *parent, char *name, uint32_t nameLen, ArenaDef *arena, RedBlack *entry); 137 | DbMap *arenaRbMap(DbMap *parent, RedBlack *entry); 138 | 139 | RedBlack *procParam(DbMap *parent, char *name, int nameLen, Params *params); 140 | DbMap *initArena (DbMap *map, ArenaDef *arenaDef, char *name, uint32_t nameLen, RedBlack *rbEntry); 141 | 142 | /** 143 | * memory mapping 144 | */ 145 | 146 | void* mapMemory(DbMap *map, uint64_t offset, uint64_t size, uint32_t segNo); 147 | void unmapSeg(DbMap *map, uint32_t segNo); 148 | bool mapSeg(DbMap *map, uint32_t segNo); 149 | bool newSeg(DbMap *map, uint32_t minSize); 150 | DbStatus dropMap(DbMap *db, bool dropDefs); 151 | 152 | void getPath(DbMap *map, char *name, uint32_t nameLen, uint64_t ver); 153 | 154 | uint32_t addPath(char *path, uint32_t len, char *name, uint32_t nameLen, uint64_t ver); 155 | -------------------------------------------------------------------------------- /db_cputime.c: -------------------------------------------------------------------------------- 1 | #ifndef _WIN32 2 | #include 3 | #include 4 | #include 5 | 6 | double getCpuTime(int type) { 7 | struct rusage used[1]; 8 | struct timeval tv[1]; 9 | 10 | switch( type ) { 11 | case 0: 12 | gettimeofday(tv, NULL); 13 | return (double)tv->tv_sec + (double)tv->tv_usec / 1000000; 14 | 15 | case 1: 16 | getrusage(RUSAGE_SELF, used); 17 | return (double)used->ru_utime.tv_sec + (double)used->ru_utime.tv_usec / 1000000; 18 | 19 | case 2: 20 | getrusage(RUSAGE_SELF, used); 21 | return (double)used->ru_stime.tv_sec + (double)used->ru_stime.tv_usec / 1000000; 22 | } 23 | 24 | return 0; 25 | } 26 | 27 | #else 28 | 29 | #define WIN32_LEAN_AND_MEAN 30 | #include 31 | #include 32 | 33 | double getCpuTime(int type) { 34 | FILETIME crtime[1]; 35 | FILETIME xittime[1]; 36 | FILETIME systime[1]; 37 | FILETIME usrtime[1]; 38 | SYSTEMTIME timeconv[1]; 39 | double ans = 0; 40 | 41 | memset (timeconv, 0, sizeof(SYSTEMTIME)); 42 | 43 | switch( type ) { 44 | case 0: 45 | GetSystemTimeAsFileTime (xittime); 46 | FileTimeToSystemTime (xittime, timeconv); 47 | ans = (double)timeconv->wDayOfWeek * 3600 * 24; 48 | break; 49 | case 1: 50 | GetProcessTimes (GetCurrentProcess(), crtime, xittime, systime, usrtime); 51 | FileTimeToSystemTime (usrtime, timeconv); 52 | break; 53 | case 2: 54 | GetProcessTimes (GetCurrentProcess(), crtime, xittime, systime, usrtime); 55 | FileTimeToSystemTime (systime, timeconv); 56 | break; 57 | } 58 | 59 | ans += (double)timeconv->wHour * 3600; 60 | ans += (double)timeconv->wMinute * 60; 61 | ans += (double)timeconv->wSecond; 62 | ans += (double)timeconv->wMilliseconds / 1000; 63 | return ans; 64 | } 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /db_cursor.c: -------------------------------------------------------------------------------- 1 | #include "btree1/btree1.h" 2 | #include "btree2/btree2.h" 3 | #include "artree/artree.h" 4 | 5 | // release cursor resources 6 | 7 | DbStatus dbCloseCursor(DbCursor *dbCursor, DbMap *map) { 8 | DbStatus stat = DB_ERROR_indextype; 9 | 10 | switch (*map->arena->type) { 11 | case Hndl_artIndex: 12 | stat = artReturnCursor(dbCursor, map); 13 | break; 14 | 15 | case Hndl_btree1Index: 16 | stat = btree1ReturnCursor(dbCursor, map); 17 | break; 18 | 19 | case Hndl_btree2Index: 20 | // stat = btree2ReturnCursor(dbCursor, map); 21 | break; 22 | } 23 | 24 | return stat; 25 | } 26 | 27 | // position cursor 28 | 29 | DbStatus dbFindKey(DbCursor *dbCursor, DbMap *map, void *key, uint32_t keyLen, CursorOp op) { 30 | DbStatus stat; 31 | 32 | switch (*map->arena->type) { 33 | case Hndl_artIndex: { 34 | if ((stat = artFindKey(dbCursor, map, key, keyLen, 0))) 35 | return stat; 36 | 37 | if (op == OpBefore) { 38 | if (dbCursor->state == CursorPosBefore) 39 | return DB_OK; 40 | else 41 | return artPrevKey(dbCursor, map); 42 | } 43 | 44 | return artNextKey(dbCursor, map); 45 | } 46 | 47 | case Hndl_btree1Index: { 48 | if ((stat = btree1FindKey(dbCursor, map, key, keyLen, op == OpOne))) 49 | return stat; 50 | 51 | if (op == OpAfter) { 52 | if (memcmp (dbCursor->key, key, keyLen) <= 0) 53 | return btree1NextKey (dbCursor, map); 54 | else 55 | return DB_OK; 56 | } 57 | 58 | if (op == OpBefore) { 59 | if (memcmp (dbCursor->key, key, keyLen) >= 0) 60 | return btree1PrevKey (dbCursor, map); 61 | else 62 | return DB_OK; 63 | } 64 | 65 | break; 66 | } 67 | 68 | case Hndl_btree2Index: { 69 | // if ((stat = btree2FindKey(dbCursor, map, key, keyLen, op == OpOne))) 70 | stat=DB_OK; 71 | 72 | 73 | return stat; 74 | 75 | if (op == OpAfter) { 76 | if (memcmp (dbCursor->key, key, keyLen) <= 0) 77 | return btree2NextKey (dbCursor, map); 78 | else 79 | return DB_OK; 80 | } 81 | 82 | if (op == OpBefore) { 83 | if (memcmp (dbCursor->key, key, keyLen) >= 0) 84 | // return btree2PrevKey (dbCursor, map); 85 | // else 86 | return DB_OK; 87 | } 88 | 89 | break; 90 | } 91 | } 92 | 93 | return DB_OK; 94 | } 95 | 96 | // position cursor before first key 97 | 98 | DbStatus dbLeftKey(DbCursor *dbCursor, DbMap *map) { 99 | DbStatus stat = DB_OK; 100 | 101 | switch (*map->arena->type) { 102 | case Hndl_artIndex: { 103 | stat = artLeftKey(dbCursor, map); 104 | break; 105 | } 106 | 107 | case Hndl_btree1Index: { 108 | stat = btree1LeftKey(dbCursor, map); 109 | break; 110 | } 111 | 112 | case Hndl_btree2Index: { 113 | // stat = btree2LeftKey(dbCursor, map); 114 | break; 115 | } 116 | } 117 | 118 | dbCursor->state = CursorLeftEof; 119 | return stat; 120 | } 121 | 122 | // position cursor after last key 123 | 124 | DbStatus dbRightKey(DbCursor *dbCursor, DbMap *map) { 125 | DbStatus stat = DB_OK; 126 | 127 | switch (*map->arena->type) { 128 | case Hndl_artIndex: { 129 | stat = artRightKey(dbCursor, map); 130 | break; 131 | } 132 | 133 | case Hndl_btree1Index: { 134 | stat = btree1RightKey(dbCursor, map); 135 | break; 136 | } 137 | 138 | case Hndl_btree2Index: { 139 | // stat = btree2RightKey(dbCursor, map); 140 | break; 141 | } 142 | } 143 | 144 | if (stat) 145 | return stat; 146 | 147 | dbCursor->state = CursorRightEof; 148 | return DB_OK; 149 | } 150 | 151 | DbStatus dbNextKey(DbCursor *dbCursor, DbMap *map) { 152 | DbStatus stat; 153 | 154 | switch(*map->arena->type) { 155 | case Hndl_artIndex: 156 | stat = artNextKey (dbCursor, map); 157 | break; 158 | 159 | case Hndl_btree1Index: 160 | stat = btree1NextKey (dbCursor, map); 161 | break; 162 | 163 | case Hndl_btree2Index: 164 | // stat = btree2NextKey (dbCursor, map); 165 | stat = DB_OK; 166 | break; 167 | 168 | default: 169 | stat = DB_ERROR_indextype; 170 | break; 171 | } 172 | 173 | return stat; 174 | } 175 | 176 | DbStatus dbPrevKey(DbCursor *dbCursor, DbMap *map) { 177 | DbStatus stat; 178 | 179 | switch(*map->arena->type) { 180 | case Hndl_artIndex: 181 | stat = artPrevKey (dbCursor, map); 182 | break; 183 | 184 | case Hndl_btree1Index: 185 | stat = btree1PrevKey (dbCursor, map); 186 | break; 187 | 188 | case Hndl_btree2Index: 189 | // stat = btree2PrevKey (dbCursor, map); 190 | stat=DB_OK; 191 | break; 192 | 193 | default: 194 | stat = DB_ERROR_indextype; 195 | break; 196 | } 197 | 198 | return stat; 199 | } 200 | 201 | uint64_t dbGetDocId(DbCursor *cursor) { 202 | if (cursor->state == CursorPosAt) 203 | return (cursor->key, cursor->keyLen); 204 | 205 | return 0; 206 | } 207 | -------------------------------------------------------------------------------- /db_cursor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // database cursor handle extension to index 4 | 5 | typedef enum { 6 | CursorNone, 7 | CursorLeftEof, 8 | CursorRightEof, 9 | CursorPosBefore, // cursor is before a key 10 | CursorPosAt // cursor is at a key 11 | } PosState; 12 | 13 | // DbCursor handle extension 14 | 15 | typedef struct { 16 | uint8_t *key; // cursor key bytes 17 | uint32_t size; // size of user data 18 | uint32_t keyLen; // cursor key length 19 | uint32_t baseLen; // cursor base length 20 | uint32_t suffix; // cursor suffix length 21 | uint32_t resultSet; // max size buffered keys 22 | DbAddr deDupHash; // dedup hash table 23 | PosState state; // cursor position state enum 24 | uint8_t foundKey; // cursor position found the key 25 | char binaryFlds; // index keys have fields 26 | uint8_t deDup; // cursor will deDuplicate result set 27 | } DbCursor; 28 | 29 | DbStatus dbCloseCursor(DbCursor *cursor, DbMap *map); 30 | DbStatus dbFindKey(DbCursor *cursor, DbMap *map, void *key, uint32_t keyLen, CursorOp op); 31 | DbStatus dbNextKey(DbCursor *cursor, DbMap *map); 32 | DbStatus dbPrevKey(DbCursor *cursor, DbMap *map); 33 | DbStatus dbRightKey(DbCursor *cursor, DbMap *map); 34 | DbStatus dbLeftKey(DbCursor *cursor, DbMap *map); 35 | 36 | uint64_t dbGetDocId(DbCursor *cursor); 37 | -------------------------------------------------------------------------------- /db_drop.c: -------------------------------------------------------------------------------- 1 | #include "base64.h" 2 | #include "db.h" 3 | 4 | 5 | DbMap memMap[1]; 6 | 7 | // drop an arena given its r/b entry 8 | // and recursively its children 9 | 10 | void dropArenaDef(DbMap *db, ArenaDef *arenaDef, bool dropDefs, char *path, uint32_t pathLen) { 11 | uint32_t len, count; 12 | PathStk pathStk[1]; 13 | RedBlack *entry; 14 | 15 | // drop our children 16 | 17 | lockLatch (arenaDef->nameTree->latch); 18 | 19 | // enumerate child nameTree 20 | 21 | if ((entry = rbStart (db, pathStk, arenaDef->nameTree))) do { 22 | ArenaDef *childDef = (ArenaDef *)(entry + 1); 23 | len = addPath(path, pathLen, rbkey(entry), entry->keyLen, childDef->nxtVer); 24 | 25 | atomicOr8(childDef->dead, KILL_BIT); 26 | 27 | // delete our name from parent's nameList 28 | 29 | if (dropDefs) 30 | rbDel(db, arenaDef->nameTree, entry); 31 | 32 | dropArenaDef(db, childDef, dropDefs, path, pathLen + len); 33 | } while ((entry = rbNext(db, pathStk))); 34 | 35 | path[pathLen] = 0; 36 | 37 | unlockLatch(arenaDef->nameTree->latch); 38 | 39 | // see if all handles have unbound 40 | 41 | lockLatch(arenaDef->hndlArray->latch); 42 | count = disableHndls(arenaDef->hndlArray); 43 | unlockLatch(arenaDef->hndlArray->latch); 44 | 45 | if (!count) 46 | deleteMap(path); 47 | } 48 | 49 | // drop an arena and all of its children 50 | // optionally, remove arenadef from parent childlist 51 | 52 | DbStatus dropMap(DbMap *map, bool dropDefs) { 53 | uint64_t id = map->arenaDef->id; 54 | char path[MAX_path]; 55 | DbMap *ourDb; 56 | 57 | // are we deleting a db from the catalog? 58 | 59 | if (*map->arena->type == Hndl_database) { 60 | ourDb = map->parent; 61 | dropDefs = false; 62 | } else 63 | ourDb = map->db; 64 | 65 | // are we already dropped? 66 | 67 | if (atomicOr8(map->drop, KILL_BIT) & KILL_BIT) 68 | return DB_ERROR_arenadropped; 69 | 70 | atomicOr8((volatile uint8_t *)map->arena->mutex, KILL_BIT); 71 | 72 | // remove id from parent's childMap list 73 | 74 | lockLatch(map->parent->childMaps->latch); 75 | // skipDel(memMap, map->parent->childMaps, id); 76 | unlockLatch(map->parent->childMaps->latch); 77 | 78 | // delete our r/b entry from parent's child nameList 79 | // or kill our name tree from our surviving arenaDef 80 | 81 | if (dropDefs) { 82 | lockLatch (map->parent->arenaDef->nameTree->latch); 83 | atomicOr8(map->arenaDef->dead, KILL_BIT); 84 | rbDel(ourDb, map->parent->arenaDef->nameTree, map->rbEntry); 85 | unlockLatch (map->parent->arenaDef->nameTree->latch); 86 | } 87 | 88 | memcpy (path, map->arenaPath, map->pathLen); 89 | 90 | dropArenaDef(map->db, map->arenaDef, dropDefs, path, map->pathLen); 91 | 92 | // when all of our children are unmapped 93 | // we can unmap ourselves 94 | 95 | if (!*map->openCnt) 96 | closeMap(map); 97 | 98 | return DB_OK; 99 | } 100 | -------------------------------------------------------------------------------- /db_error.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define db_abort(expr, msg, val) (fprintf(stderr, "db_abort: line:%d file:%s\nexpr:(%s) is false: %s\n", __LINE__, __FILE__, #expr, msg), abort(), val) 4 | 5 | typedef enum { 6 | DB_OK = 0, 7 | DB_START = 1000, 8 | DB_ERROR_duplicate_suffix, 9 | DB_ERROR_outofmemory, 10 | DB_ERROR_handleclosed, 11 | DB_ERROR_createdatabase, 12 | DB_ERROR_createindex, 13 | DB_ERROR_badhandle, 14 | DB_ERROR_badrecid, 15 | DB_ERROR_badobjslot, 16 | DB_ERROR_notbasever, 17 | DB_ERROR_recorddeleted, 18 | DB_ERROR_recordnotvisible, 19 | DB_ERROR_notcurrentversion, 20 | DB_ERROR_cursornotpositioned, 21 | DB_ERROR_invaliddeleterecord, 22 | DB_ERROR_cursorbasekeyerror, 23 | DB_ERROR_cursoroverflow, 24 | DB_ERROR_cursorop, 25 | DB_ERROR_writeconflict, 26 | DB_ERROR_duplicatekey, 27 | DB_ERROR_keylength, 28 | DB_ERROR_keynotfound, 29 | DB_ERROR_badtxnstep, 30 | DB_ERROR_rollbackidxkey, 31 | DB_ERROR_arena_already_closed, 32 | DB_ERROR_arenadropped, 33 | DB_ERROR_deletekey, 34 | DB_ERROR_indextype, 35 | DB_ERROR_indexnode, 36 | DB_ERROR_unique_key_constraint, 37 | DB_CURSOR_eof, 38 | DB_CURSOR_notfound, 39 | DB_CURSOR_notpositioned, 40 | DB_CURSOR_nothandle, 41 | DB_ITERATOR_eof, 42 | DB_ITERATOR_notfound, 43 | DB_ITERATOR_nothandle, 44 | DB_BTREE_needssplit, 45 | DB_BTREE_error, 46 | DB_ARTREE_error, 47 | DB_ITER_eof, 48 | DB_ERROR_pageisgarbage, 49 | DB_ERROR_txn_being_committed, 50 | DB_ERROR_txn_not_serializable, 51 | DB_ERROR_key_constraint_violation, 52 | DB_ERROR_no_visible_version, 53 | DB_ERROR_not_raw_document, 54 | MVCC_WriteConflict, 55 | MVCC_outofmemory, 56 | MVCC_NoTimestampSlots, 57 | } DbStatus; 58 | 59 | -------------------------------------------------------------------------------- /db_frame.c: -------------------------------------------------------------------------------- 1 | #include "base64.h" 2 | #include "db.h" 3 | 4 | #include "db_arena.h" 5 | #include "db_map.h" 6 | #include "db_frame.h" 7 | #include "db_handle.h" 8 | #include "db_object.h" 9 | 10 | uint64_t getFreeFrame(DbMap *map); 11 | uint64_t allocFrame( DbMap *map); 12 | 13 | #ifdef DEBUG 14 | extern uint64_t nodeAlloc[64]; 15 | extern uint64_t nodeFree[64]; 16 | extern uint64_t nodeWait[64]; 17 | #endif 18 | 19 | // fill in new frame with new available objects 20 | // call with free list head locked. 21 | // return false if out of memory 22 | 23 | uint32_t initObjFrame(DbMap *map, DbAddr *free, uint32_t type, uint32_t size) { 24 | uint32_t dup = FrameSlots, idx; 25 | Frame *frame; 26 | DbAddr slot; 27 | 28 | if (size * dup > 4096 * 4096) 29 | dup >>= 5; 30 | 31 | else if (size * dup > 1024 * 1024) 32 | dup >>= 3; 33 | 34 | else if (size * dup > 256 * 256) 35 | dup >>= 1; 36 | 37 | if (!(slot.bits = allocMap(map, size * dup))) 38 | return false; 39 | 40 | if (!free->addr) 41 | if (!(free->addr = allocFrame(map))) 42 | return false; 43 | 44 | free->type = FrameType; 45 | free->nslot = dup; 46 | 47 | frame = getObj(map, *free); 48 | frame->next.bits = 0; 49 | frame->prev.bits = 0; 50 | 51 | slot.type = type; 52 | 53 | for (idx = dup; idx--; ) { 54 | frame->slots[idx] = slot.bits; 55 | slot.off += size >> 4; 56 | } 57 | 58 | return dup; 59 | } 60 | 61 | // allocate frame full of empty frames for free list 62 | // call with freeFrame latched. 63 | 64 | bool initFreeFrame (DbMap *map) { 65 | uint64_t addr = allocMap (map, sizeof(Frame) * (FrameSlots + 1)); 66 | uint32_t dup = FrameSlots; 67 | DbAddr head, slot; 68 | Frame *frame; 69 | 70 | if (!addr) 71 | return false; 72 | 73 | head.bits = addr; 74 | head.type = FrameType; 75 | head.nslot = FrameSlots; 76 | head.mutex = 1; 77 | 78 | frame = getObj(map, head); 79 | frame->next.bits = 0; 80 | frame->prev.bits = 0; 81 | 82 | while (dup--) { 83 | addr += sizeof(Frame) >> 4; 84 | slot.bits = addr; 85 | slot.type = FrameType; 86 | slot.nslot = FrameSlots; 87 | frame->slots[dup] = slot.bits; 88 | } 89 | 90 | map->arena->sysFrame[freeFrame].freeFrame->bits = head.bits; 91 | return true; 92 | } 93 | 94 | // obtain available frame 95 | 96 | uint64_t allocFrame(DbMap *map) { 97 | Frame *frame; 98 | DbAddr slot, *free; 99 | 100 | free = map->arena->sysFrame[freeFrame].freeFrame; 101 | lockLatch(free->latch); 102 | 103 | while (!(slot.bits = getFreeFrame(map))) 104 | if (!initFreeFrame (map)) { 105 | unlockLatch(map->arena->sysFrame[freeFrame].freeFrame->latch); 106 | return false; 107 | } 108 | 109 | unlockLatch(free->latch); 110 | frame = getObj(map, slot); 111 | frame->next.bits = 0; 112 | frame->prev.bits = 0; 113 | 114 | slot.type = FrameType; 115 | return slot.bits; 116 | } 117 | 118 | /* sys frame entries 119 | typedef enum { 120 | freeFrame = 1, // frames 121 | freeObjId, // 122 | } SYSFrame; 123 | */ 124 | 125 | // Add empty frame to frame free-list 126 | 127 | void returnFreeFrame(DbMap *map, DbAddr free) { 128 | Frame *frame; 129 | DbAddr *addr; 130 | 131 | addr = map->arena->sysFrame[freeFrame].freeFrame; 132 | lockLatch(addr->latch); 133 | 134 | // space in current free-list frame? 135 | 136 | if (addr) 137 | if (addr->nslot < FrameSlots) { 138 | frame = getObj(map, *addr); 139 | frame->slots[addr->nslot++] = free.bits; 140 | unlockLatch(addr->latch); 141 | return; 142 | } 143 | 144 | // otherwise turn free into new freeFrame frame 145 | 146 | frame = getObj(map, free); 147 | frame->next.bits = addr->bits; 148 | frame->prev.bits = 0; 149 | 150 | // add free frame to freeFrame list 151 | // and remove mutex 152 | 153 | addr->bits = free.addr; 154 | } 155 | 156 | // Add value to frame 157 | 158 | bool addSlotToFrame(DbMap *map, DbAddr *queue, uint64_t value) { 159 | bool resp; 160 | 161 | // this latch covers both free and tail 162 | 163 | lockLatch(queue->latch); 164 | resp = addValuesToFrame(map, queue, &value, 1); 165 | unlockLatch(queue->latch); 166 | return resp; 167 | } 168 | 169 | // Add vector of values to free frame 170 | // call with free slot locked. 171 | 172 | bool addValuesToFrame(DbMap *map, DbAddr *queue, uint64_t *values, int count) { 173 | DbAddr slot2; 174 | Frame *frame; 175 | 176 | if (queue->addr) 177 | frame = getObj(map, *queue); 178 | else 179 | frame = NULL; 180 | 181 | while (count--) { 182 | // space in current frame? 183 | 184 | if (queue->addr && queue->nslot < FrameSlots) { 185 | frame->slots[queue->nslot++] = values[count]; 186 | continue; 187 | } 188 | 189 | // allocate new frame and 190 | // push frame onto queue list 191 | 192 | if (!(slot2.bits = allocFrame(map))) 193 | return false; 194 | 195 | frame = getObj(map, slot2); 196 | frame->prev.bits = 0; 197 | 198 | // link new frame onto tail of wait chain 199 | 200 | if ((frame->next.bits = queue->addr)) { 201 | Frame *prevFrame = getObj(map, *queue); 202 | prevFrame->timestamp = map->arena->nxtTs; 203 | prevFrame->prev.bits = slot2.bits; 204 | prevFrame->prev.nslot = FrameSlots; 205 | } 206 | 207 | // install new frame at queue head, with lock set 208 | 209 | slot2.nslot = 1; 210 | queue->bits = slot2.bits | ADDR_MUTEX_SET; 211 | frame->slots[0] = values[count]; 212 | } 213 | 214 | return true; 215 | } 216 | 217 | // pull free frame from free list 218 | // call with freeFrame locked 219 | 220 | uint64_t getFreeFrame(DbMap *map) { 221 | uint64_t addr; 222 | Frame *frame; 223 | DbAddr *free, *tail; 224 | 225 | free = map->arena->sysFrame[freeFrame].freeFrame; 226 | tail = map->arena->sysFrame[freeFrame].tailFrame; 227 | 228 | if (!free->addr) 229 | return 0; 230 | 231 | frame = getObj(map, *free); 232 | 233 | // are there available free frames? 234 | 235 | if (free->nslot) 236 | return frame->slots[--free->nslot] & ADDR_BITS; 237 | 238 | // is there more than one freeFrame? 239 | 240 | if (!frame->next.bits) 241 | return 0; 242 | 243 | addr = free->addr; 244 | frame->next.nslot = FrameSlots; 245 | frame->next.mutex = 1; 246 | 247 | tail->bits = frame->next.bits; 248 | return addr; 249 | } 250 | 251 | // pull available node from free object frame 252 | // call with free frame list head locked. 253 | 254 | uint64_t getNodeFromFrame(DbMap *map, DbAddr* free) { 255 | while (free->addr) { 256 | Frame *frame = getObj(map, *free); 257 | 258 | // are there available free objects? 259 | 260 | if (free->nslot) 261 | return frame->slots[--free->nslot]; 262 | 263 | // leave empty frame in place to collect 264 | // new nodes 265 | 266 | if (frame->next.addr) 267 | returnFreeFrame(map, *free); 268 | else 269 | return 0; 270 | 271 | // move the head of the free list 272 | // to the next frame 273 | 274 | free->bits = frame->next.addr | ADDR_MUTEX_SET; 275 | free->nslot = FrameSlots; 276 | } 277 | 278 | return 0; 279 | } 280 | 281 | // initialize frame of available ObjId/DocId 282 | 283 | bool initObjIdFrame(DbMap *map, DbAddr *free) { 284 | uint32_t dup, seg, off; 285 | uint64_t max; 286 | ObjId objId[1]; 287 | Frame *frame; 288 | DbAddr addr; 289 | 290 | lockLatch(map->arena->mutex); 291 | 292 | if (!(addr.bits = free->addr)) 293 | if (!(addr.bits = allocFrame(map))) { 294 | unlockLatch(map->arena->mutex); 295 | return false; 296 | } 297 | 298 | while (true) { 299 | seg = map->arena->objSeg; 300 | dup = FrameSlots; 301 | 302 | max = map->arena->segs[seg].size * 16ULL - 303 | map->arena->segs[seg].maxId * map->objSize; 304 | 305 | max -= dup * map->objSize; 306 | 307 | // does it fit? 308 | 309 | if (map->arena->segs[seg].nextObject.off * 16ULL < max ) 310 | break; 311 | 312 | // move onto next segment 313 | 314 | if (seg < map->arena->currSeg) { 315 | map->arena->objSeg++; 316 | continue; 317 | } 318 | 319 | // build empty segment 320 | 321 | if (!newSeg(map, dup * map->objSize)) { 322 | unlockLatch(map->arena->mutex); 323 | return false; 324 | } 325 | 326 | map->arena->objSeg = map->arena->currSeg; 327 | break; 328 | } 329 | 330 | // allocate a batch of ObjIds 331 | 332 | off = map->arena->segs[map->arena->objSeg].maxId += dup; 333 | 334 | objId->bits = off; 335 | objId->seg = seg; 336 | 337 | frame = getObj(map, addr); 338 | frame->next.bits = 0; 339 | frame->prev.bits = 0; 340 | 341 | free->addr = addr.addr; 342 | free->type = FrameType; 343 | free->nslot = dup; 344 | 345 | while (dup--) { 346 | objId->off = off - dup; 347 | frame->slots[dup] = objId->bits; 348 | } 349 | 350 | unlockLatch(map->arena->mutex); 351 | return true; 352 | } 353 | 354 | -------------------------------------------------------------------------------- /db_frame.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define FrameSlots 125 // make sizeof(Frame) a multiple of 16 4 | 5 | typedef struct { 6 | DbAddr next; // next frame in queue 7 | DbAddr prev; // prev frame in queue 8 | uint64_t timestamp; // latest timestamp 9 | uint64_t slots[FrameSlots]; // array of waiting/free slots 10 | } Frame; 11 | 12 | void returnFreeFrame(DbMap *map, DbAddr slot); 13 | 14 | uint64_t getNodeFromFrame (DbMap *map, DbAddr *queue); 15 | uint32_t initObjFrame (DbMap *map, DbAddr *queue, uint32_t type, uint32_t size); 16 | bool addValuesToFrame(DbMap *map, DbAddr *queue, uint64_t *values, int count); 17 | bool addSlotToFrame(DbMap *map, DbAddr *queue, uint64_t value); 18 | bool initObjIdFrame(DbMap *map, DbAddr *free); 19 | void returnFreeFrame(DbMap *map, DbAddr free); 20 | 21 | 22 | -------------------------------------------------------------------------------- /db_handle.c: -------------------------------------------------------------------------------- 1 | #include "base64.h" 2 | #include "db.h" 3 | #include "db_handle.h" 4 | #include "db_api.h" 5 | #include "db_arena.h" 6 | #include "db_cursor.h" 7 | #include "db_map.h" 8 | #include "db_object.h" 9 | #include "db_redblack.h" 10 | 11 | extern char *hndlNames[]; 12 | 13 | // make handle from map pointer 14 | // leave it bound 15 | 16 | Handle *makeHandle(DbMap *map, uint32_t clntSize, uint32_t xtraSize, HandleType type) { 17 | ObjId hndlId[1]; 18 | Handle *handle; 19 | 20 | // get a new or recycled ObjId slot where the handle will live 21 | 22 | if (!(hndlId->bits = allocObjId(hndlMap))) 23 | return NULL; 24 | 25 | handle = fetchIdSlot(hndlMap, *hndlId); 26 | 27 | // initialize the new Handle 28 | // allocate in HndlMap 29 | 30 | // size of handle client area (e.g. Cursor/Iterator) 31 | 32 | clntSize += 15; 33 | clntSize &= -16; 34 | 35 | if (sizeof(Handle) + xtraSize > hndlMap->arena->objSize) { 36 | fprintf(stderr, 37 | "Error: makeHandle(%s): sizeof (Handle: %d) + (xtraSize: %d) .gt. " 38 | "(ObjSize: %d)\n", 39 | hndlNames[type], 40 | (int)sizeof(Handle), xtraSize, hndlMap->arenaDef->objSize); 41 | return NULL; 42 | } 43 | 44 | if ((handle->clntSize = clntSize)) 45 | handle->clientAddr.bits = allocBlk(hndlMap, clntSize, true); 46 | 47 | handle->entryTs = atomicAdd64(&map->arena->nxtTs, 1); 48 | handle->mapAddr = db_memAddr(map); 49 | handle->hndlId.bits = hndlId->bits; 50 | handle->hndlType = type; 51 | handle->bindCnt[0] = 1; 52 | 53 | return handle; 54 | } 55 | 56 | // assign Catalog docStore idx slot 57 | 58 | // delete handle resources 59 | 60 | // called by setter of the status KILL_BIT 61 | // after bindcnt goes to zero 62 | 63 | void destroyHandle(Handle *handle) { 64 | char maxType = atomicExchange8((uint8_t *)handle->maxType, 0); 65 | DbMap *dbMap = MapAddr(handle); 66 | ArenaDef *arenaDef = dbMap->arenaDef; 67 | uint32_t count; 68 | 69 | if (!maxType) return; 70 | if (handle->clntSize) freeBlk(hndlMap, handle->clientAddr); 71 | 72 | // specific handle cleanup 73 | 74 | switch (handle->hndlType) { 75 | case Hndl_cursor: 76 | dbCloseCursor(getObj(hndlMap, handle->clientAddr), dbMap); 77 | break; 78 | } 79 | 80 | // never return the handle Id slot 81 | // but return the memory 82 | 83 | freeBlk(hndlMap, handle->clientAddr); 84 | 85 | // zero the handle Id status 86 | 87 | if (~dbMap->drop[0] & KILL_BIT) return; 88 | 89 | lockLatch(arenaDef->hndlArray->latch); 90 | count = disableHndls(arenaDef->hndlArray); 91 | unlockLatch(arenaDef->hndlArray->latch); 92 | 93 | if (!count) 94 | if (!*dbMap->openCnt) closeMap(dbMap); 95 | } 96 | 97 | // enter api with a handle 98 | 99 | bool enterHandle(Handle *handle) { 100 | int cnt = atomicAdd32(handle->bindCnt, 1); 101 | DbMap *map = MapAddr(handle); 102 | 103 | // are we the first call after an idle period? 104 | // set the entryTs if so. 105 | 106 | if (cnt == 1) handle->entryTs = atomicAdd64(&map->arena->nxtTs, 1); 107 | 108 | // exit if the handle is being closed 109 | 110 | if ((*handle->status & KILL_BIT)) { 111 | if (!atomicAdd32(handle->bindCnt, -1)) destroyHandle(handle); 112 | 113 | return false; 114 | } 115 | 116 | // is there a DROP request for this arena? 117 | 118 | if (map->drop[0] & KILL_BIT) { 119 | atomicOr8((volatile uint8_t *)handle->status, KILL_BIT); 120 | 121 | if (!atomicAdd32(handle->bindCnt, -1)) destroyHandle(handle); 122 | 123 | return false; 124 | } 125 | 126 | return true; 127 | } 128 | 129 | // bind handle for use in API call 130 | // return NULL if handle closed 131 | 132 | Handle *bindHandle(DbHandle dbHndl, HandleType hndlType) { 133 | Handle *handle = HandleAddr(dbHndl.hndlId); 134 | HandleType type = handle->hndlType; 135 | 136 | switch (hndlType) { 137 | case Hndl_anyIdx: 138 | if (type != Hndl_artIndex && type != Hndl_btree1Index && 139 | type != Hndl_btree2Index) 140 | return NULL; 141 | 142 | break; 143 | 144 | case Hndl_any: 145 | break; 146 | 147 | default: 148 | if (hndlType != type) 149 | return NULL; 150 | 151 | break; 152 | } 153 | 154 | // increment count of active binds 155 | // and capture timestamp if we are the 156 | // first handle bind 157 | 158 | if (enterHandle(handle)) return handle; 159 | 160 | return NULL; 161 | } 162 | 163 | // release handle binding 164 | 165 | void releaseHandle(Handle *handle) { 166 | if (!atomicAdd32(handle->bindCnt, -1)) { 167 | if ((*handle->status & KILL_BIT)) { 168 | destroyHandle(handle); 169 | } 170 | } 171 | } 172 | 173 | // disable all arena handles 174 | // by scanning HndlId arrayhndl 175 | // for dropped arenas 176 | 177 | // call with array DbAddr latched 178 | // return count of bound handles 179 | 180 | uint32_t disableHndls(DbAddr *array) { 181 | uint32_t count = 0; 182 | Handle *handle; 183 | ArrayHdr *hdr; 184 | int slot, seg; 185 | ObjId objId; 186 | 187 | if (array->addr) { 188 | hdr = getObj(hndlMap, *array); 189 | 190 | // process the level zero blocks in the array 191 | 192 | for (slot = 0; slot < hdr->maxLvl0; slot++) { 193 | uint64_t *inUse = getObj(hndlMap, hdr->addr[slot]); 194 | DbAddr *hndlAddr = (DbAddr *)inUse; 195 | 196 | for (seg = 0; seg < ARRAY_inuse; seg++) { 197 | uint64_t bits = inUse[seg]; 198 | int slotIdx = 0; 199 | 200 | // sluff unused slots in level zero block 201 | 202 | if (seg == 0) { 203 | slotIdx = ARRAY_first(sizeof(DbAddr)); 204 | bits >>= slotIdx; 205 | } 206 | 207 | do 208 | if (bits & 1) { 209 | objId.bits = hndlAddr[seg * 64 + slotIdx].bits; 210 | handle = fetchIdSlot(hndlMap, objId); 211 | 212 | atomicOr8((volatile uint8_t *)handle->status, KILL_BIT); 213 | count += *handle->bindCnt; 214 | } 215 | while (slotIdx++, bits /= 2); 216 | } 217 | } 218 | } 219 | 220 | return count; 221 | } 222 | 223 | // find arena's earliest bound handle 224 | // by scanning HndlId array 225 | 226 | uint64_t scanHandleTs(DbMap *map) { 227 | DbAddr *array = map->arenaDef->hndlArray; 228 | uint64_t lowTs = map->arena->nxtTs + 1; 229 | Handle *handle; 230 | ArrayHdr *hdr; 231 | int slot, seg; 232 | DbAddr addr; 233 | 234 | if (array->addr) { 235 | hdr = getObj(hndlMap, *array); 236 | 237 | // process all the level zero blocks in the array 238 | 239 | for (slot = 0; slot < hdr->maxLvl0; slot++) { 240 | uint64_t *inUse = getObj(hndlMap, hdr->addr[slot]); 241 | DbAddr *hndlAddr = (DbAddr *)inUse; 242 | 243 | for (seg = 0; seg < ARRAY_inuse; seg++) { 244 | uint64_t bits = inUse[seg]; 245 | int slotIdx = 0; 246 | 247 | if (seg == 0) { 248 | slotIdx = ARRAY_first(sizeof(DbAddr)); 249 | bits >>= slotIdx; 250 | } 251 | 252 | do 253 | if (bits & 1) { 254 | addr.bits = hndlAddr[seg * 64 + slotIdx].bits; 255 | handle = getObj(hndlMap, addr); 256 | 257 | if (!(*handle->status & KILL_BIT)) 258 | if (handle->bindCnt[0]) lowTs = handle->entryTs; 259 | } 260 | while (slotIdx++, bits /= 2); 261 | } 262 | } 263 | } 264 | 265 | return lowTs; 266 | } 267 | -------------------------------------------------------------------------------- /db_handle.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Handle for an opened arena 4 | // these instances live in hndlMap (Catalog) 5 | 6 | // the map instances opened for a process live in memMap 7 | // in the objId segmented array 8 | // 9 | // ** marks fields that are 10 | // specific to the base arena 11 | 12 | typedef struct { 13 | union { 14 | DbHandle hndl[1]; 15 | ObjId hndlId; // Handle Id in HndlMap (Catalog) 16 | }; 17 | DbAddr mapAddr; // addr for this map in memMaps 18 | DbAddr clientAddr; // addr for client area in hndlMap 19 | uint64_t entryTs; // time stamp of first api call 20 | uint32_t hndlIdx; // catalog docstore index for this handle 21 | uint32_t clntSize; // size of client area (iterator/cursor) 22 | uint32_t xtraSize; // size of user work area after this Handle 23 | uint32_t bindCnt[1]; // count of open api calls (handle binds) 24 | uint16_t frameIdx; // arena avail/empty frames entry index assigned 25 | uint16_t arrayIdx; // arena open handle array index 26 | uint8_t maxType[1]; // number of arena free array frame list entries 27 | uint8_t status[1]; // current status of the handle 28 | uint8_t hndlType; // type of handle 29 | uint8_t relaxTs; 30 | TriadQueue queue[32]; // working blks transferred back to areena 31 | } Handle; 32 | 33 | // Handle status codes 34 | 35 | typedef enum { 36 | HndlIdle = 0, 37 | HndlKill = 1, 38 | } HndlCodes; 39 | 40 | // types of handles/arenas 41 | 42 | typedef enum { 43 | Hndl_any = 0, 44 | Hndl_anyIdx, 45 | Hndl_catalog, 46 | Hndl_database, 47 | Hndl_docStore, 48 | Hndl_artIndex, 49 | Hndl_btree1Index, 50 | Hndl_btree2Index, 51 | Hndl_colIndex, 52 | Hndl_iterator, 53 | Hndl_cursor, 54 | Hndl_txns, 55 | Hndl_max 56 | } HandleType; 57 | 58 | uint32_t disableHndls(DbAddr *hndlCalls); 59 | uint64_t scanHandleTs(DbMap *map); 60 | 61 | Handle *makeHandle(DbMap *map, uint32_t clntSize, uint32_t cursorSize, HandleType type); 62 | void releaseHandle(Handle *handle); 63 | bool enterHandle(Handle *handle); 64 | Handle *bindHandle(DbHandle dbHndl, HandleType type); 65 | 66 | void destroyHandle(Handle *handle); 67 | -------------------------------------------------------------------------------- /db_index.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // Index data structure after DbArena object 3 | 4 | 5 | // Btree1Index global data on disk after Arena 6 | // Global Index data structure after DbArena object 7 | 8 | typedef struct { 9 | uint64_t numKeys[1]; // number of keys in index 10 | DbAddr keySpec; 11 | bool delimFlds; // keys made with field values 12 | bool uniqueKeys; // keys made with field values 13 | } DbIndex; 14 | 15 | typedef struct { 16 | union { 17 | uint8_t *keyBuff; 18 | DbAddr bytes; 19 | }; 20 | ObjId docId[1]; 21 | uint16_t lastFld; 22 | uint16_t keyMax; 23 | uint16_t keyLen; // len of entire key 24 | uint16_t delimFlds; // use key fields with binary comparisons 25 | uint16_t suffixLen; // len of payload key at end 26 | uint8_t unique : 1; // index is unique 27 | uint8_t deferred : 1; // uniqueness deferred 28 | } DbKeyValue; 29 | 30 | -------------------------------------------------------------------------------- /db_iterator.c: -------------------------------------------------------------------------------- 1 | #include "base64.h" 2 | #include "db.h" 3 | 4 | #include "db_object.h" 5 | #include "db_handle.h" 6 | #include "db_arena.h" 7 | #include "db_map.h" 8 | #include "db_api.h" 9 | #include "db_iterator.h" 10 | 11 | // 12 | // increment a segmented ObjId 13 | // 14 | 15 | bool incrObjId(Iterator *it, DbMap *map) { 16 | uint64_t start = it->docId.bits; 17 | uint64_t mask, *tstNull, span; 18 | 19 | while (it->docId.seg <= map->arena->objSeg) { 20 | while (++it->docId.off <= map->arena->segs[it->docId.seg].maxId) { 21 | 22 | span = map->objSize; 23 | 24 | tstNull = (uint64_t *)(map->base[it->docId.seg] + map->arena->segs[it->docId.seg].size - it->docId.off * span); 25 | 26 | while(span > 8 ) 27 | if(*tstNull++ ) 28 | return true; 29 | else 30 | span -= 8; 31 | 32 | mask = (256ULL << (span * 8)) - 1; 33 | 34 | if(*tstNull & mask ) 35 | return true; 36 | else 37 | continue; 38 | } 39 | 40 | it->docId.off = 0; 41 | it->docId.seg++; 42 | } 43 | 44 | it->docId.bits = start; 45 | return false; 46 | } 47 | 48 | // 49 | // decrement a segmented recordId 50 | // 51 | 52 | bool decrObjId(Iterator *it, DbMap *map) { 53 | uint64_t start = it->docId.bits; 54 | uint64_t mask, *tstNull, span; 55 | 56 | while (true) { 57 | if(it->docId.off) { 58 | span = map->objSize; 59 | 60 | tstNull = (uint64_t *)(map->base[it->docId.seg] + map->arena->segs[it->docId.seg].size - it->docId.off * span); 61 | 62 | while(span > 8 ) 63 | if(*tstNull++ ) 64 | return true; 65 | else 66 | span -= 8; 67 | 68 | mask = (256ULL << (span * 8)) - 1; 69 | 70 | if(*tstNull & mask ) 71 | return true; 72 | else 73 | continue; 74 | } 75 | 76 | if(*tstNull & mask ) 77 | return true; 78 | 79 | if (it->docId.seg) { 80 | it->docId.seg--; 81 | it->docId.off = map->arena->segs[it->docId.seg].maxId + 1; 82 | continue; 83 | } 84 | 85 | it->docId.bits = start; 86 | return false; 87 | } 88 | } 89 | 90 | // advance/reverse iterator 91 | 92 | DbStatus iteratorMove(DbHandle hndl, IteratorOp op, DocId *docId) { 93 | DbStatus stat = DB_OK; 94 | Handle *docHndl; 95 | Iterator *it; 96 | DbMap *docMap; 97 | 98 | if ((docHndl = bindHandle(hndl, Hndl_docStore))) 99 | docMap = MapAddr(docHndl); 100 | else 101 | return DB_ERROR_handleclosed; 102 | 103 | it = getObj(hndlMap, docHndl->clientAddr); 104 | 105 | switch (op) { 106 | case IterNext: 107 | if (incrObjId(it, docMap)) { 108 | it->state = IterPosAt; 109 | docId->bits = it->docId.bits; 110 | } else { 111 | it->state = IterRightEof; 112 | stat = DB_ITERATOR_eof; 113 | } 114 | 115 | break; 116 | 117 | case IterPrev: 118 | if (decrObjId(it, docMap)) { 119 | it->state = IterPosAt; 120 | docId->bits = it->docId.bits; 121 | } else { 122 | it->state = IterLeftEof; 123 | stat = DB_ITERATOR_eof; 124 | } 125 | 126 | break; 127 | 128 | case IterBegin: 129 | it->docId.bits = 0; 130 | docId->bits = it->docId.bits; 131 | it->state = IterLeftEof; 132 | stat = DB_ITERATOR_eof; 133 | break; 134 | 135 | 136 | case IterEnd: 137 | it->docId.seg = docMap->arena->objSeg; 138 | it->docId.off = docMap->arena->segs[it->docId.seg].maxId; 139 | 140 | docId->bits = it->docId.bits; 141 | it->state = IterRightEof; 142 | stat = DB_ITERATOR_eof; 143 | break; 144 | 145 | case IterSeek: 146 | if((it->docId.bits = docId->bits)) 147 | it->state = IterPosAt; 148 | else 149 | stat = DB_ITERATOR_notfound; 150 | 151 | break; 152 | } 153 | 154 | docId->bits = it->docId.bits; 155 | releaseHandle(docHndl); 156 | return stat; 157 | } 158 | 159 | // 160 | // advance iterator forward 161 | // 162 | 163 | DbDoc *iteratorNext(DbHandle hndl) { 164 | Handle *docHndl; 165 | DbAddr *slot; 166 | DbMap *docMap; 167 | Iterator *it; 168 | DbDoc *dbDoc = NULL; 169 | 170 | if ((docHndl = bindHandle(hndl, Hndl_docStore))) 171 | docMap = MapAddr(docHndl); 172 | else 173 | return NULL; 174 | 175 | it = getObj(hndlMap, docHndl->clientAddr); 176 | 177 | if (incrObjId(it, docMap)) { 178 | slot = fetchIdSlot(docMap, it->docId); 179 | it->state = IterPosAt; 180 | dbDoc = getObj(docMap, *slot); 181 | } else 182 | it->state = IterRightEof; 183 | 184 | releaseHandle(docHndl); 185 | return NULL; 186 | } 187 | 188 | // 189 | // advance iterator backward 190 | // 191 | 192 | DbDoc *iteratorPrev(DbHandle hndl) { 193 | Handle *docHndl; 194 | DbAddr *slot; 195 | DbMap *docMap; 196 | Iterator *it; 197 | DbDoc *dbDoc = NULL; 198 | 199 | if ((docHndl = bindHandle(hndl, Hndl_docStore))) 200 | docMap = MapAddr(docHndl); 201 | else 202 | return NULL; 203 | 204 | it = getObj(hndlMap, docHndl->clientAddr); 205 | 206 | if (decrObjId(it, docMap)) { 207 | slot = fetchIdSlot(docMap, it->docId); 208 | dbDoc = getObj(docMap, *slot); 209 | it->state = IterPosAt; 210 | } else 211 | it->state = IterLeftEof; 212 | 213 | return dbDoc; 214 | } 215 | 216 | // 217 | // set iterator to specific objectId 218 | // 219 | 220 | DbDoc *iteratorFetch(DbHandle hndl, ObjId docId) { 221 | Handle *docHndl; 222 | DbAddr *slot; 223 | DbMap *docMap; 224 | DbDoc *dbDoc; 225 | 226 | if ((docHndl = bindHandle(hndl, Hndl_docStore))) 227 | docMap = MapAddr(docHndl); 228 | else 229 | return NULL; 230 | 231 | slot = fetchIdSlot(docMap, docId); 232 | dbDoc = getObj(docMap, *slot); 233 | releaseHandle(docHndl); 234 | return dbDoc; 235 | } 236 | 237 | // 238 | // set iterator to specific objectId 239 | // 240 | 241 | DbDoc *iteratorSeek(DbHandle hndl, ObjId docId) { 242 | Handle *docHndl; 243 | DbAddr *slot; 244 | DbMap *docMap; 245 | Iterator *it; 246 | DbDoc *dbDoc; 247 | 248 | if ((docHndl = bindHandle(hndl, Hndl_docStore))) 249 | docMap = MapAddr(docHndl); 250 | else 251 | return NULL; 252 | 253 | it = getObj(hndlMap, docHndl->clientAddr); 254 | it->docId.bits = docId.bits; 255 | 256 | slot = fetchIdSlot(docMap, docId); 257 | dbDoc = getObj(docMap, *slot); 258 | releaseHandle(docHndl); 259 | return dbDoc; 260 | } 261 | -------------------------------------------------------------------------------- /db_iterator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // iterator object 4 | // created in handle client area 5 | 6 | typedef enum { IterNone, IterLeftEof, IterRightEof, IterPosAt } IterState; 7 | 8 | typedef struct { 9 | ObjId docId; // current ObjID 10 | IterState state; 11 | } Iterator; 12 | 13 | // Iterator operations 14 | 15 | typedef enum { 16 | IterNext = 'n', 17 | IterPrev = 'p', 18 | IterBegin = 'b', 19 | IterEnd = 'e', 20 | IterSeek = 's', 21 | IterFetch = 'f' 22 | } IteratorOp; 23 | 24 | DbStatus iteratorMove(DbHandle hndl, IteratorOp op, DocId *docId); 25 | 26 | DbDoc *iteratorFetch(DbHandle hndl, ObjId docId); 27 | DbDoc *iteratorSeek(DbHandle hndl, ObjId docId); 28 | DbDoc *iteratorNext(DbHandle hndl); 29 | DbDoc *iteratorPrev(DbHandle hndl); 30 | 31 | DbStatus createIterator(DbHandle *hndl, DbHandle docHndl, Params *params); 32 | 33 | 34 | -------------------------------------------------------------------------------- /db_malloc.c: -------------------------------------------------------------------------------- 1 | // db_malloc.c 2 | 3 | #include "base64.h" 4 | #include "db.h" 5 | 6 | #include "db_arena.h" 7 | #include "db_object.h" 8 | #include "db_handle.h" 9 | #include "db_cursor.h" 10 | #include "db_map.h" 11 | #include "db_malloc.h" 12 | 13 | #ifndef __APPLE__ 14 | #include 15 | #endif 16 | 17 | bool mallocDebug; 18 | 19 | // 20 | // Raw object wrapper 21 | // 22 | 23 | typedef struct { 24 | DbAddr addr; 25 | uint32_t size; 26 | } dbobj_t; 27 | 28 | DbArena memArena[1]; 29 | DbMap memMap[1]; 30 | 31 | void memInit(void) { 32 | ArenaDef arenaDef[1]; 33 | 34 | memMap->arena = memArena; 35 | memMap->db = memMap; 36 | 37 | #ifdef _WIN32 38 | memMap->hndl = INVALID_HANDLE_VALUE; 39 | #else 40 | memMap->hndl = -1; 41 | #endif 42 | 43 | // set up memory arena and handle addr ObjId 44 | 45 | memset(arenaDef, 0, sizeof(arenaDef)); 46 | arenaDef->objSize = sizeof(DbAddr); 47 | 48 | initArena(memMap, arenaDef, "malloc", 6, NULL); 49 | } 50 | 51 | uint32_t db_memSize(void *obj) { 52 | dbobj_t *raw = obj; 53 | 54 | return raw[-1].size; 55 | } 56 | 57 | DbAddr db_memAddr(void *obj) { 58 | dbobj_t *raw = obj; 59 | 60 | return raw[-1].addr; 61 | } 62 | 63 | void *db_memObj(DbAddr addr) { 64 | return (uint8_t *)getObj(memMap, addr) + sizeof(dbobj_t); 65 | } 66 | 67 | void db_memFree(DbAddr addr) { 68 | freeBlk(memMap, addr); 69 | } 70 | 71 | void db_free(void *obj) { 72 | dbobj_t *raw = obj; 73 | DbAddr addr = raw[-1].addr; 74 | 75 | if (mallocDebug) { 76 | raw[-1].addr.bits = 0xdeadbeef; 77 | 78 | if (addr.bits == 0xdeadbeef) { 79 | fprintf(stderr, "db_free: duplicate free!\n"); 80 | exit(0); 81 | } 82 | } 83 | 84 | freeBlk(memMap, addr); 85 | } 86 | 87 | uint32_t db_size(void *obj) { 88 | dbobj_t *raw = obj; 89 | 90 | return raw[-1].size; 91 | } 92 | 93 | // allocate object 94 | 95 | void *db_malloc(uint32_t len, bool zeroit) { 96 | dbobj_t *mem; 97 | DbAddr addr; 98 | 99 | addr.bits = db_rawAlloc(len + sizeof(dbobj_t), zeroit); 100 | mem = getObj(memMap, addr); 101 | mem->addr.bits = addr.bits; 102 | mem->size = len; 103 | return mem + 1; 104 | } 105 | 106 | // raw memory allocator 107 | 108 | uint64_t db_rawAlloc(uint32_t amt, bool zeroit) { 109 | uint64_t bits; 110 | 111 | if ((bits = allocBlk(memMap, amt, zeroit))) return bits; 112 | 113 | fprintf(stderr, "db_rawAlloc: out of memory!\n"); 114 | exit(1); 115 | } 116 | 117 | uint32_t db_rawSize(DbAddr addr) { 118 | int bits = addr.type / 2; 119 | uint32_t size = 1 << bits; 120 | 121 | // implement half-bit sizing 122 | // if type is even. 123 | 124 | if (~addr.type & 1) size -= size / 4; 125 | 126 | return size; 127 | } 128 | 129 | void *db_rawObj(DbAddr addr) { return getObj(memMap, addr); } 130 | -------------------------------------------------------------------------------- /db_malloc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base64.h" 3 | #include "db.h" 4 | 5 | /** 6 | * memory allocation 7 | */ 8 | 9 | uint64_t db_rawAlloc(uint32_t amt, bool zero); 10 | uint32_t db_rawSize(DbAddr addr); 11 | void *db_rawObj(DbAddr addr); 12 | 13 | void *db_malloc(uint32_t amt, bool zero); 14 | void db_free(void *obj); 15 | uint32_t db_size(void *obj); 16 | DbAddr db_memAddr(void *obj); 17 | uint32_t db_memSize(void *mem); 18 | void *db_memObj(DbAddr addr); 19 | void db_memFree(DbAddr addr); 20 | -------------------------------------------------------------------------------- /db_map.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /** 4 | * map allocations 5 | */ 6 | 7 | uint64_t allocMap(DbMap *map, uint32_t size); 8 | uint64_t allocBlk(DbMap *map, uint32_t size, bool zeroit); 9 | uint64_t allocObj(DbMap* map, TriadQueue *queue, int type, uint32_t size, bool zeroit ); 10 | uint64_t allocObjId(DbMap *map); 11 | 12 | void *fetchIdSlot (DbMap *map, ObjId objId); 13 | void *getObj(DbMap *map, DbAddr addr); 14 | void freeBlk(DbMap *map, DbAddr addr); 15 | void freeId(DbMap *map, ObjId objId); 16 | 17 | uint64_t getFreeFrame(DbMap *map); 18 | uint64_t allocFrame(DbMap *map); 19 | 20 | /** 21 | * spin latches 22 | */ 23 | 24 | void lockAddr(volatile uint64_t* bits); 25 | void unlockAddr(volatile uint64_t* bits); 26 | void lockLatchGrp(volatile uint8_t *latch, uint8_t bitNo); 27 | void unlockLatchGrp(volatile uint8_t *latch, uint8_t bitNo); 28 | void waitNonZero(volatile uint8_t *zero); 29 | void waitNonZero32(volatile uint32_t *zero); 30 | void waitNonZero64(volatile uint64_t *zero); 31 | void waitZero(volatile uint8_t *zero); 32 | void waitZero32(volatile uint32_t *zero); 33 | void waitZero64(volatile uint64_t *zero); 34 | void art_yield(void); 35 | 36 | #define lockLatch(latch) lockLatchGrp(latch, 0) 37 | #define unlockLatch(latch) unlockLatchGrp(latch, 0) 38 | 39 | /** 40 | * atomic integer ops 41 | */ 42 | 43 | void kill_slot(volatile uint8_t* latch); 44 | 45 | bool atomicCAS8(volatile uint8_t *dest, uint8_t comp, uint8_t newValue); 46 | bool atomicCAS16(volatile uint16_t *dest, uint16_t comp, uint16_t newValue); 47 | bool atomicCAS32(volatile uint32_t *dest, uint32_t comp, uint32_t newValue); 48 | bool atomicCAS64(volatile uint64_t *dest, uint64_t comp, uint64_t newValue); 49 | 50 | uint64_t atomicAdd64(volatile uint64_t *value, int64_t amt); 51 | uint32_t atomicAdd32(volatile uint32_t *value, int32_t amt); 52 | uint16_t atomicAdd16(volatile uint16_t *value, int16_t amt); 53 | uint64_t atomicOr64(volatile uint64_t *value, uint64_t amt); 54 | uint32_t atomicOr32(volatile uint32_t *value, uint32_t amt); 55 | uint64_t atomicExchange(volatile uint64_t *target, uint64_t value); 56 | uint64_t compareAndSwap(volatile uint64_t* target, uint64_t compare_val, uint64_t swap_val); 57 | uint8_t atomicExchange8(volatile uint8_t *target, uint8_t value); 58 | uint8_t atomicAnd8(volatile uint8_t *value, uint8_t mask); 59 | uint8_t atomicOr8(volatile uint8_t *value, uint8_t mask); 60 | 61 | int readSegZero(DbMap *map, DbArena *segZero); 62 | 63 | void closeMap(DbMap *map); 64 | void deleteMap(char *path); 65 | bool fileExists(char *path); 66 | void yield(void); 67 | 68 | #ifdef _WIN32 69 | void lockArena (HANDLE hndl, char *path); 70 | void unlockArena (HANDLE hndl, char *path); 71 | #else 72 | void lockArena (int hndl, char *path); 73 | void unlockArena (int hndl, char *path); 74 | #endif 75 | 76 | int64_t db_getEpoch(void); 77 | long mynrand48(unsigned short xseed[3]); 78 | -------------------------------------------------------------------------------- /db_object.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef _WIN32 4 | #include 5 | #endif 6 | 7 | // number of elements in an array node 8 | // max element idx = ARRAY_size * ARRAY_lvl1 9 | 10 | // note that each level zero block reserves first few 11 | // indexes (ARRAY_first) for the inUse bit map 12 | 13 | #define ARRAY_size 256 // level zero slot count 14 | #define ARRAY_lvl1 (256 - 2) // adjust to power of two sizeround 15 | 16 | // define the number of inUse slots per level zero block 17 | 18 | #define ARRAY_inuse ((ARRAY_size + 64 - 1) / 64) 19 | 20 | // calculate the number of sluffed indexes because of inUse bit maps 21 | 22 | #define ARRAY_first(objsize) ((objsize) ? (ARRAY_inuse * sizeof(uint64_t) + (objsize) - 1) / (objsize) : 0) 23 | 24 | // Arrays 25 | 26 | typedef struct { 27 | uint16_t level0; // level0 slot to allocate 28 | uint16_t maxLvl0; // number of level one blocks 29 | uint32_t objSize; // size of each array element 30 | DbAddr addr[ARRAY_lvl1]; // level one block addresses 31 | } ArrayHdr; 32 | 33 | void *arrayElement(DbMap *map, DbAddr *array, uint16_t idx, uint32_t size); 34 | void *arrayEntry(DbMap *map, DbAddr *array, uint16_t idx); 35 | 36 | uint16_t arrayAlloc(DbMap *map, DbAddr *array, uint32_t size); 37 | uint16_t arrayFirst(uint32_t objSize); 38 | 39 | void arrayRelease(DbMap *map, DbAddr *array, uint16_t idx); 40 | 41 | enum ObjType { 42 | FrameType, 43 | ObjIdType, // ObjId value 44 | MinObjType = 4, // minimum object size in bits 45 | MaxObjType = 50 // each half power of two, 4 - 24 46 | }; 47 | 48 | // set-membership control 49 | 50 | typedef struct { 51 | DbAddr next; 52 | uint16_t cnt; // count of entries in this table 53 | uint16_t max; // number of hash table entries 54 | uint16_t sizeIdx; // hash table size vector slot 55 | uint64_t table[0]; // the hash table entries 56 | } DbMmbr; 57 | 58 | DbMmbr *xtnMmbr(DbMap *map, DbAddr *addr, DbMmbr *first); 59 | DbMmbr *iniMmbr(DbMap *map, DbAddr *addr, int minSize); 60 | 61 | // mmbr table enumerators 62 | 63 | void *getMmbr(DbMmbr *mmbr, uint64_t item); 64 | void *nxtMmbr(DbMmbr *mmbr, uint64_t *entry); 65 | void *allMmbr(DbMmbr *mmbr, uint64_t *entry); 66 | void *revMmbr(DbMmbr *mmbr, uint64_t *entry); 67 | 68 | // mmbr-set functions 69 | 70 | uint64_t *setMmbr(DbMap *map, DbAddr *addr, uint64_t keyVal, bool add); 71 | uint64_t *newMmbr(DbMap *map, DbAddr *addr, uint64_t keyVal); 72 | -------------------------------------------------------------------------------- /db_params.c: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __MACH__ 3 | #include 4 | #include 5 | #endif 6 | #include "base64.h" 7 | #include "db.h" 8 | #include "db_object.h" 9 | #include "db_redblack.h" 10 | #include "db_arena.h" 11 | #include "db_map.h" 12 | #include "db_api.h" 13 | 14 | // if this is a new map file, copy param 15 | // structure to a new ArenaDef in parent 16 | // otherwise, return existing arenaDef 17 | // from the parent. 18 | 19 | RedBlack *procParam(DbMap *parent, char *name, int nameLen, Params *params) { 20 | PathStk pathStk[1]; 21 | ArenaDef *arenaDef; 22 | RedBlack *rbEntry; 23 | DbAddr *slot; 24 | 25 | // see if ArenaDef already exists as a child in the parent 26 | 27 | while (true) { 28 | lockLatch (parent->arenaDef->nameTree->latch); 29 | 30 | if ((rbEntry = rbFind(parent->db, parent->arenaDef->nameTree, name, nameLen, pathStk))) { 31 | arenaDef = (ArenaDef *)(rbEntry + 1); 32 | 33 | if (*arenaDef->dead & KILL_BIT) { 34 | unlockLatch (parent->arenaDef->nameTree->latch); 35 | yield (); 36 | continue; 37 | } 38 | 39 | unlockLatch (parent->arenaDef->nameTree->latch); 40 | return rbEntry; 41 | } 42 | 43 | break; 44 | } 45 | 46 | // create new rbEntry in parent 47 | // with an arenaDef payload 48 | 49 | if ((rbEntry = rbNew(parent->db, name, nameLen, sizeof(ArenaDef)))) { 50 | arenaDef = (ArenaDef *)(rbEntry + 1); 51 | } else { 52 | unlockLatch(parent->arenaDef->nameTree->latch); 53 | return NULL; 54 | } 55 | 56 | // fill in new arenaDef r/b entry 57 | 58 | arenaDef->creation = db_getEpoch(); 59 | 60 | memcpy (arenaDef->params, params, sizeof(arenaDef->params)); 61 | 62 | // allocate slot in parent's openMap array 63 | 64 | arenaDef->id = arrayAlloc(parent->db, parent->arenaDef->childList, sizeof(DbAddr)); 65 | slot = arrayEntry(parent->db, parent->arenaDef->childList, (uint16_t)arenaDef->id); 66 | slot->bits = rbEntry->addr.bits; 67 | 68 | // add arenaDef to parent's child arenaDef by name tree 69 | 70 | rbAdd(parent->db, parent->arenaDef->nameTree, rbEntry, pathStk); 71 | unlockLatch(parent->arenaDef->nameTree->latch); 72 | return rbEntry; 73 | } 74 | 75 | // get millisecond precision system timestamp epoch 76 | 77 | int64_t db_getEpoch(void) { 78 | #ifdef __MACH__ // OS X does not have clock_gettime, use clock_get_time 79 | clock_serv_t cclock[1]; 80 | mach_timespec_t mts[1]; 81 | host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, cclock); 82 | clock_get_time(*cclock, mts); 83 | mach_port_deallocate(mach_task_self(), *cclock); 84 | return mts->tv_sec * 1000ULL + mts->tv_nsec; 85 | #elif !defined(_WIN32) 86 | struct timespec ts[1]; 87 | clock_gettime(_XOPEN_REALTIME, ts); 88 | return ts->tv_sec * 1000ULL + ts->tv_nsec / 1000000ULL; 89 | #else 90 | int64_t wintime[1]; 91 | GetSystemTimeAsFileTime((FILETIME*)wintime); 92 | *wintime /= 10000ULL; 93 | *wintime -= 11644473600000i64; //1jan1601 to 1jan1970 94 | return *wintime; 95 | #endif 96 | } 97 | 98 | -------------------------------------------------------------------------------- /db_redblack.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "base64.h" 4 | #include "db.h" 5 | 6 | // red-black tree descent stack 7 | 8 | #define RB_bits 24 9 | 10 | typedef struct { 11 | uint32_t lvl; // height of the stack 12 | DbAddr entry[RB_bits]; // stacked tree nodes 13 | } PathStk; 14 | 15 | typedef struct { 16 | DbAddr left, right, addr; // next nodes down, entry addr 17 | uint32_t payLoad; // length of payload following 18 | uint16_t keyLen; // length of key after payload 19 | uint8_t red; // is tree node red? 20 | } RedBlack; 21 | 22 | #define rbkey(entry) ((char *)(entry + 1) + entry->payLoad) 23 | 24 | RedBlack *rbFind(DbMap *parent, DbAddr *childNames, char *name, uint32_t nameLen, PathStk *path); 25 | RedBlack *rbNew (DbMap *map, char *key, uint32_t keyLen, uint32_t payload); 26 | RedBlack *rbStart(DbMap *map, PathStk *path, DbAddr *root); 27 | RedBlack *rbNext(DbMap *map, PathStk *path); 28 | 29 | void rbAdd(DbMap *map, DbAddr *root, RedBlack *entry, PathStk *path); 30 | bool rbDel (DbMap *map, DbAddr *root, RedBlack *entry); 31 | void rbKill (DbMap *map, DbAddr root); 32 | void rbLeftRotate (DbMap *map, DbAddr *root, DbAddr slot, RedBlack *parent, int cmp); 33 | void rbLeftRotate (DbMap *map, DbAddr *root, DbAddr slot, RedBlack *parent, int cmp); 34 | void rbAdd (DbMap *map, DbAddr *root, RedBlack *entry, PathStk *path); 35 | bool rbDel (DbMap *map, DbAddr *root, RedBlack *entry);RedBlack *rbNew (DbMap *map, char *key, uint32_t keyLen, uint32_t payLoad); 36 | RedBlack *rbStart(DbMap *map, PathStk *path, DbAddr *root); 37 | RedBlack *rbNext(DbMap *map, PathStk *path); 38 | void rbKill (DbMap *map, DbAddr slot); 39 | 40 | 41 | -------------------------------------------------------------------------------- /db_skiplist.c: -------------------------------------------------------------------------------- 1 | // skip list implementation 2 | 3 | #include "base64.h" 4 | #include "db.h" 5 | #include "db_arena.h" 6 | #include "db_map.h" 7 | #include "db_skiplist.h" 8 | 9 | // initialize initial skip list node 10 | 11 | uint64_t skipInit(DbMap *map, int numEntries) { 12 | if (numEntries > 127) 13 | numEntries = 127; 14 | 15 | return allocBlk(map, numEntries * sizeof(SkipEntry) + sizeof(SkipNode), true); 16 | } 17 | 18 | // find key value in skiplist, return entry address 19 | 20 | SkipEntry *skipFind(DbMap *map, DbAddr *skip, uint64_t key) { 21 | DbAddr *next = skip; 22 | SkipNode *skipNode; 23 | SkipEntry *entry; 24 | 25 | while (next->addr) { 26 | skipNode = getObj(map, *next); 27 | 28 | if (*skipNode->array[next->nslot-1].key >= key) { 29 | entry = skipNode->array + skipSearch(skipNode->array, next->nslot-1, key); 30 | 31 | if (*entry->key == key) 32 | return entry; 33 | 34 | return NULL; 35 | } 36 | 37 | next = skipNode->next; 38 | } 39 | 40 | return NULL; 41 | } 42 | 43 | // remove key from skip list 44 | // returning value from slot 45 | 46 | uint64_t skipDel(DbMap *map, DbAddr *skip, uint64_t key) { 47 | SkipNode *skipNode = NULL, *prevNode; 48 | DbAddr *next = skip; 49 | SkipEntry *entry; 50 | uint64_t val; 51 | 52 | while (next->addr) { 53 | prevNode = skipNode; 54 | skipNode = getObj(map, *next); 55 | 56 | if (*skipNode->array[next->nslot-1].key >= key) { 57 | entry = skipNode->array + skipSearch(skipNode->array, next->nslot-1, key); 58 | 59 | if (*entry->key == key) 60 | val = *entry->val; 61 | else 62 | return 0; 63 | 64 | // remove the entry slot 65 | 66 | if (--next->nslot) { 67 | while (entry - skipNode->array < next->nslot) { 68 | entry[0] = entry[1]; 69 | entry++; 70 | } 71 | 72 | return val; 73 | } 74 | 75 | // skip list node is empty, remove it 76 | 77 | if (prevNode) 78 | prevNode->next->bits = skipNode->next->bits; 79 | else 80 | skip->bits = skipNode->next->bits; 81 | 82 | freeBlk(map, *next); 83 | return val; 84 | } 85 | 86 | next = skipNode->next; 87 | } 88 | 89 | return 0; 90 | } 91 | 92 | // Push new maximal key onto head of skip list 93 | // return the value slot address 94 | 95 | SkipEntry *skipPush(DbMap *map, DbAddr *skip, uint64_t key) { 96 | SkipNode *skipNode; 97 | SkipEntry *entry; 98 | uint64_t next; 99 | 100 | if (!skip->addr || skip->nslot == skipSize(skip)) { 101 | next = skip->bits; 102 | 103 | skip->bits = allocBlk(map, SKIP_node * sizeof(SkipEntry) + sizeof(SkipNode), true) | ADDR_MUTEX_SET; 104 | skipNode = getObj(map, *skip); 105 | skipNode->next->bits = next; 106 | } 107 | else 108 | skipNode = getObj(map, *skip); 109 | 110 | entry = skipNode->array + skip->nslot++; 111 | *entry->key = key; 112 | return entry; 113 | } 114 | 115 | // add item to skiplist 116 | 117 | DbStatus addItemToSkiplist(DbMap *map, DbAddr *skip, uint64_t key, uint64_t item) { 118 | SkipEntry *entry; 119 | 120 | lockLatch(skip->latch); 121 | entry = skipAdd(map, skip, key); 122 | *entry->val = item; 123 | unlockLatch(skip->latch); 124 | return DB_OK; 125 | } 126 | 127 | // Add arbitrary key to skip list 128 | // call with skip hdr locked 129 | // return entry address 130 | 131 | SkipEntry *skipAdd(DbMap *map, DbAddr *skip, uint64_t key) { 132 | SkipNode *skipNode = NULL, *nextNode; 133 | DbAddr *next = skip; 134 | uint64_t prevBits; 135 | SkipEntry *entry; 136 | int min, max; 137 | 138 | while (next->addr) { 139 | skipNode = getObj(map, *next); 140 | 141 | // find skiplist node that covers key 142 | 143 | if (*skipNode->array[next->nslot-1].key < key) 144 | if (skipNode->next->bits) { 145 | next = skipNode->next; 146 | continue; 147 | } 148 | 149 | // we belong in this skipNode 150 | // find the first entry .ge. to the new key 151 | 152 | min = skipSearch(skipNode->array, next->nslot, key); 153 | entry = skipNode->array + min; 154 | 155 | // does key already exist? 156 | 157 | if (min < next->nslot && *entry->key == key) 158 | return entry; 159 | 160 | // split node if already full 161 | 162 | if (next->nslot == skipSize(next)) { 163 | prevBits = skipNode->next->bits; 164 | skipNode->next->bits = allocBlk(map, SKIP_node * sizeof(SkipEntry) + sizeof(SkipNode), true); 165 | 166 | nextNode = getObj(map, *skipNode->next); 167 | nextNode->next->bits = prevBits; 168 | memcpy(nextNode->array, skipNode->array + skipSize(skipNode->next) / 2, sizeof(skipNode->next) * (skipSize(skipNode->next) - skipSize(skipNode->next) / 2)); 169 | 170 | skipNode->next->nslot = skipSize(skipNode->next) - skipSize(skipNode->next) / 2; 171 | next->nslot = skipSize(next) / 2; 172 | continue; 173 | } 174 | 175 | // insert new entry slot at min 176 | 177 | max = next->nslot++; 178 | 179 | while (--max > min) 180 | skipNode->array[max + 1] = skipNode->array[max]; 181 | 182 | // fill in key and return value slot 183 | 184 | *skipNode->array[min].key = key; 185 | return skipNode->array + min; 186 | } 187 | 188 | // initialize empty list 189 | 190 | skip->bits = allocBlk(map, SKIP_node * sizeof(SkipEntry) + sizeof(SkipNode), true) | ADDR_MUTEX_SET; 191 | skipNode = getObj(map, *skip); 192 | 193 | *skipNode->array->key = key; 194 | skip->nslot = 1; 195 | 196 | return skipNode->array; 197 | } 198 | 199 | // search Skip node for idx of key value slot 200 | // e.g. key value is <= entry[idx] 201 | 202 | int skipSearch(SkipEntry *array, int high, uint64_t key) { 203 | int low = 0; 204 | 205 | // invariants: 206 | // key <= entry[high] 207 | // key > entry[low-1] 208 | 209 | while (high > low) { 210 | int diff = (high - 1 - low) / 2; 211 | 212 | if (key <= *array[low + diff].key) 213 | high = low + diff; 214 | else 215 | low += diff + 1; 216 | } 217 | 218 | return high; 219 | } 220 | 221 | -------------------------------------------------------------------------------- /db_skiplist.h: -------------------------------------------------------------------------------- 1 | #include "rwlock/readerwriter.h" 2 | 3 | /** 4 | * skip lists 5 | */ 6 | 7 | // skip list head 8 | 9 | struct SkipHead_ { 10 | DbAddr head[1]; // list head 11 | RWLock lock[1]; // reader/writer lock 12 | }; 13 | 14 | // Skip list entry 15 | 16 | typedef struct SkipEntry_ { 17 | uint64_t key[1]; // entry key 18 | uint64_t val[1]; // entry value 19 | } SkipEntry; 20 | 21 | // size of skip list entry array 22 | 23 | typedef struct { 24 | DbAddr next[1]; // next block of keys 25 | SkipEntry array[0]; // array of key/value pairs 26 | } SkipNode; 27 | 28 | int skipSearch(SkipEntry *array, int high, uint64_t key); 29 | SkipEntry *skipFind(DbMap *map, DbAddr *skip, uint64_t key); 30 | SkipEntry *skipPush(DbMap *map, DbAddr *skip, uint64_t key); 31 | SkipEntry *skipAdd(DbMap *map, DbAddr *skip, uint64_t key); 32 | uint64_t skipInit(DbMap *map, int numEntries); 33 | uint64_t skipDel(DbMap *map, DbAddr *skip, uint64_t key); 34 | DbStatus addItemToSkiplist(DbMap *map, DbAddr *skip, uint64_t key, uint64_t item); 35 | 36 | -------------------------------------------------------------------------------- /implementation guide: -------------------------------------------------------------------------------- 1 | The primary data structure is the memory mapped arena which consists of a DocID vector which grow downwards from the end of the arena, and documents which are allocated space starting from the bottom of the arena. The DbAddr objects which identify these arena allocations are 64 bits. The largest possible arena size is 64 GB, and 1000 arenas per collection. Using the DbAddr means that multi-process and multi-threaded programs are suppoted with differing base addresses. 2 | 3 | The database data structures support the DocID which is uniquely assigned to each document out of a segmented linear array created for each collection. Each collection supports an arbitrary number of the Iterator and the cursor objects which return de-duplication sets of DocId and collection DbAddr. 4 | 5 | 1. access to a document version (or rowID or tuple) is via a DocID presented to the storage layer. The version returned to the caller depends on the timestamp of the request. The most recent TS that is less than the requestor is chosen. 6 | 7 | 2. an iterator can be opened over the DocId segmented linear index which will return all of the current document versions subject to TS selection. The iterator supports first or last DocID, next or prev DocID, move-to DocID. 8 | 9 | 3. an arbitrary number of keyed indexes can be added, or not, to each document version as part of document creation or updating. There are currently 2 types: ART and btree. Cursors can be used to lookup individual documents by index key, or traverse the DocID according to cursor ordering. 10 | 11 | 4. Javascript tools can be written to administer and debug the various facets that SQLite programs are invoking and running into problems. The Javascript engine natively supports documents, collections and indexes, network connections with JSON/BSON encoding. 12 | 13 | 5. Backend API for SQLite or Javascript or mongoDB is based on Handles returned and managed by db_handle.c, db_api.c mvcc_dbapi.c 14 | for document stores, collections, indexes. A demonstration system, with appropriate glue in a db_sqlite.c 15 | d 16 | -------------------------------------------------------------------------------- /mvcc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "base64.h" 4 | #include "db.h" 5 | #include "db_api.h" 6 | #include "db_index.h" 7 | #include "Hi-Performance-Timestamps/timestamps.h" 8 | 9 | // MVCC and TXN definitions for DATABASE project 10 | 11 | typedef enum { 12 | TxnDone, // fully committed 13 | TxnGrowing, // reading and upserting 14 | TxnCommitting, // committing 15 | TxnCommitted, // committed 16 | TxnRollback // roll back 17 | } TxnState; 18 | 19 | typedef enum { 20 | TxnRaw = 0, // txn step raw read/write 21 | TxnMap, // txn step is a Catalog handle idx 22 | TxnRdr, // txn step is a docId & version 23 | TxnWrt, // txn step is write of version 24 | TxnVer // txn step is verNo 25 | } TxnStep; 26 | 27 | typedef enum { 28 | TxnNotSpecified, 29 | TxnSnapShot, 30 | TxnReadCommitted, 31 | TxnSerializable 32 | } TxnCC; 33 | 34 | typedef enum { 35 | objNone, 36 | objDoc, 37 | objVer, 38 | objHndl, 39 | objTxn, 40 | objString, 41 | objRec, 42 | objErr, 43 | } MVCCType; 44 | 45 | typedef struct { 46 | union { 47 | void *object; 48 | uint8_t *buff; 49 | uint64_t bits; 50 | }; 51 | uint64_t value; 52 | uint32_t count; 53 | uint32_t size; 54 | MVCCType objType; 55 | DbStatus status; 56 | DbAddr dbobject; 57 | } MVCCResult; 58 | 59 | 60 | uint32_t hashVal(uint8_t* src, uint32_t len); 61 | 62 | 63 | // catalog concurrency parameters 64 | 65 | typedef struct { 66 | TxnCC isolation; 67 | } CcMethod; 68 | 69 | // Database transactions: DbAddr housed in database ObjId frames 70 | 71 | typedef struct { 72 | Timestamp reader[1]; // txn begin timestamp 73 | Timestamp commit[1]; // txn commit timestamp 74 | Timestamp pstamp[1]; // predecessor high water 75 | Timestamp sstamp[1]; // successor low water 76 | DbAddr rdrFrame[1]; // head read set DocIds 77 | DbAddr rdrFirst[1]; // first read set DocIds 78 | DbAddr wrtFrame[1]; // head write set DocIds 79 | DbAddr wrtFirst[1]; // first write set DocIds 80 | ObjId nextTxn, txnId; // nested txn next, this txn 81 | ObjId hndlId; // current docStore handle 82 | uint32_t wrtCount; // size of write set 83 | uint32_t txnVer; // txn slot sequence number 84 | union { 85 | struct { 86 | uint8_t latch[1]; 87 | uint8_t isolation:3; 88 | uint8_t state : 5; 89 | uint16_t tsClnt; // timestamp generator slot 90 | }; 91 | TxnCC disp : 8; // display isolation mode in debugger 92 | }; 93 | } Txn; 94 | 95 | typedef struct { 96 | ObjId txnId; 97 | DbAddr deDup[1]; // de-duplication set membership 98 | DbHandle hndl[1]; // docStore DbHandle 99 | Timestamp reader[1];// read timestamp 100 | TxnCC isolation; // txn isolation mode 101 | } DbMvcc; 102 | 103 | Txn* mvcc_fetchTxn(ObjId txnId); 104 | void mvcc_releaseTxn(Txn* txn); 105 | 106 | #include "mvcc_dbapi.h" 107 | #include "mvcc_dbdoc.h" 108 | #include "mvcc_dbtxn.h" 109 | #include "mvcc_dbidx.h" 110 | #include "mvcc_dbssn.h" 111 | 112 | -------------------------------------------------------------------------------- /mvcc_dbapi.c: -------------------------------------------------------------------------------- 1 | // Define the document API for mvcc and ACID transactions 2 | // implemented for the database project. 3 | 4 | #include "mvcc.h" 5 | 6 | uint32_t hashVal(uint8_t *src, uint32_t len) { 7 | uint32_t val = 0; 8 | uint32_t b = 378551; 9 | uint32_t a = 63689; 10 | 11 | while (len) { 12 | val *= a; 13 | a *= b; 14 | if (len < sizeof(uint32_t)) 15 | val += src[--len]; 16 | else { 17 | len -= 4; 18 | val += *(uint32_t *)(src + len); 19 | } 20 | } 21 | 22 | return val; 23 | } 24 | 25 | typedef struct { 26 | DbHandle dbHndl; 27 | DbHandle docHndl; 28 | } MVCC_Interface; 29 | 30 | /* 31 | MVCCResult mvcc_OpenDocumentInterface(DbHandle hndl[1], char *name, uint32_t len, Params *params) { 32 | Handle *dbHndl = bindHandle(hndl, Hndl_database); 33 | MVCCResult result = {.value = 0, .count = 0, .objType = objHndl, .status = DB_OK}; 34 | 35 | 36 | } 37 | */ -------------------------------------------------------------------------------- /mvcc_dbapi.h: -------------------------------------------------------------------------------- 1 | // mvcc api 2 | 3 | #pragma once 4 | 5 | // Pending Doc action 6 | 7 | typedef enum { 8 | OpRaw = 0, // not in a doc 9 | OpWrt, // insert new doc 10 | OpDel, // delete the doc 11 | OpRdr, // update the doc 12 | OpMask = 7, 13 | OpCommit = 8 // version committed bit 14 | } DocAction; 15 | 16 | // document mvcc version header 17 | 18 | typedef struct Version { 19 | union { 20 | struct Stop { 21 | uint32_t verSize; // total version size 22 | uint32_t offset; // offset from beginning of doc header 23 | } stop[1]; 24 | uint8_t verBase[sizeof(struct Stop)]; 25 | }; 26 | uint64_t verNo; // version number 27 | Timestamp commit[1]; // commit timestamp 28 | Timestamp pstamp[1]; // highest access timestamp 29 | Timestamp sstamp[1]; // successor's commit timestamp, or infinity 30 | ObjId txnId; 31 | DocId docId; 32 | uint8_t deferred; // some keys have deferred constraints 33 | DbVector keys[1]; // vector of keys for this version 34 | } Ver; 35 | 36 | // Document base for mvcc version set reached by docId 37 | 38 | typedef struct { 39 | DbDoc dbDoc[1]; // std document header 40 | DbAddr prevAddr; // previous document set 41 | DbAddr nextAddr; // next newer document version set 42 | uint32_t commitVer; // offset of most recent committed version 43 | uint32_t pendingVer; // offset of pending uncommitted version 44 | ObjId txnId; // txn slot sequence number 45 | uint64_t verNo; // next version number, increment on assignment 46 | DocAction op; // pending document action/committing bit 47 | DocId docId; // pending uncommitted txn ID 48 | } Doc; 49 | 50 | 51 | MVCCResult mvcc_beginTxn(Params* params, ObjId nestedTxn); 52 | MVCCResult mvcc_rollbackTxn(Params* params, uint64_t txnBits); 53 | MVCCResult mvcc_commitTxn(Txn *txn, Params* params); 54 | 55 | MVCCResult mvcc_installNewVersion(Handle *docHndl, uint32_t valSize, DocId* docId, uint16_t keyCnt); 56 | 57 | MVCCResult mvcc_writeDoc(Txn *txn, DbHandle dbHndl, DocId *docId, uint32_t valSize, uint8_t *valBytes, uint16_t keyCnt); 58 | 59 | MVCCResult mvcc_processKey(DbHandle hndl, DbHandle hndlIdx, Ver* prevVer, Ver* ver, DbKeyValue *srcKey); 60 | 61 | // MVCCResult mvcc_OpenDocumentInterface (DbHandle dbHndl[1], char *name, uint32_t len, Params *params); -------------------------------------------------------------------------------- /mvcc_dbdoc.c: -------------------------------------------------------------------------------- 1 | // mvcc document implementation for database project 2 | 3 | #include "mvcc.h" 4 | 5 | // allocate docStore power-of-two memory 6 | 7 | uint64_t mvcc_allocDocStore(Handle* docHndl, uint32_t size, bool zeroit) { 8 | DbMap *map = MapAddr(docHndl); 9 | 10 | return allocObj(map, map->arena->blkFrame, -1, size, zeroit); 11 | } 12 | 13 | // allocate and install new document versions 14 | // docSlot points at slot containing the DbAddr of the doc 15 | 16 | MVCCResult mvcc_chainNewDoc(Handle* docHndl, DbAddr* docSlot, uint32_t valSize, uint32_t keyCnt) { 17 | MVCCResult result = { .value = 0, .count = 0, .objType = objDoc, .status = DB_OK}; 18 | uint32_t verSize, stopSize, rawSize, blkSize; 19 | DbMap* docMap = MapAddr(docHndl); 20 | DocStore* docStore; 21 | Doc *doc, *prevDoc; 22 | DbAddr docAddr; 23 | Ver* ver; 24 | 25 | stopSize = sizeof(ver->stop); 26 | stopSize += 15; 27 | stopSize &= -16; 28 | 29 | docStore = (DocStore *)(docMap + 1); 30 | blkSize = docStore->blkSize; 31 | 32 | if(blkSize == 0 ) 33 | blkSize = 4096; 34 | 35 | verSize = sizeof(Ver) + keyCnt * sizeof(DbAddr) + valSize; 36 | verSize += stopSize + sizeof(Doc); 37 | 38 | while( verSize > blkSize ) 39 | blkSize *= 2; 40 | 41 | // set up the document 42 | 43 | if( !docSlot ) 44 | return result.status = DB_ERROR_badobjslot, result; 45 | 46 | if (docSlot->addr) 47 | prevDoc = getObj(docMap, *docSlot); 48 | else 49 | prevDoc = NULL; 50 | 51 | // allocate space in docStore for the new Document or version 52 | 53 | if ((docAddr.bits = mvcc_allocDocStore(docHndl, blkSize, false))) 54 | doc = getObj(docMap, docAddr); 55 | else 56 | return result.status = DB_ERROR_outofmemory, result; 57 | 58 | rawSize = db_rawSize(docAddr); 59 | 60 | // init new document block 61 | 62 | memset(doc, 0, sizeof(Doc)); 63 | doc->commitVer = rawSize - stopSize; 64 | doc->prevAddr = *docSlot; 65 | 66 | doc->dbDoc->docId->bits = docSlot->bits; 67 | doc->dbDoc->docType = VerMvcc; 68 | 69 | // fill-in stopper version (verSize == 0) at end of document 70 | 71 | ver = (Ver*)(doc->dbDoc->base + rawSize - stopSize); 72 | ver->stop->offset = rawSize - stopSize; 73 | ver->stop->verSize = 0; 74 | 75 | // install locked new head of version chain 76 | 77 | docSlot->bits = ADDR_MUTEX_SET | docAddr.bits; 78 | result.object = doc; 79 | return result; 80 | } 81 | 82 | // process new document version keys 83 | 84 | MVCCResult mvcc_ProcessKeys(DbHandle hndl, DbHandle hndlIdx, Ver* prevVer, Ver* ver, DocId docId, MVCCKeyValue *srcKey, uint16_t keyCnt) { 85 | 86 | Handle *docHndl = bindHandle(hndl, Hndl_docStore); 87 | 88 | Handle *idxHndl = bindHandle(hndlIdx, Hndl_anyIdx); 89 | MVCCResult result = { 90 | .value = 0, .count = 0, .objType = 0, .status = DB_OK}; 91 | uint32_t size = sizeof(DbKeyValue); 92 | DbMap* docMap = MapAddr(docHndl); 93 | DbAddr insKey, addr; 94 | MVCCKeyValue *destKey; 95 | uint32_t hashKey, verSize; 96 | uint8_t *key = getObj(docMap, srcKey->bytes); 97 | int slot; 98 | 99 | if( !docHndl ) 100 | return result.objType = objErr, result.status = DB_ERROR_handleclosed, result; 101 | 102 | docMap = MapAddr(docHndl); 103 | size += srcKey->kv->keyLen; 104 | 105 | hashKey = hashVal(key, srcKey->kv->keyLen - srcKey->suffix); 106 | 107 | // see if this key already indexed 108 | // in previous version 109 | 110 | if (prevVer) { 111 | verSize = sizeof(struct Stop); 112 | 113 | } 114 | 115 | // otherwise install key for delete/update 116 | // and i key into its index 117 | 118 | insKey.bits = mvcc_allocDocStore(docHndl, size + calc64(docId.bits), true); 119 | destKey = getObj(docMap, insKey); 120 | memcpy(destKey, srcKey, size); 121 | destKey->keyHash = hashKey; 122 | 123 | size += store64((uint8_t *)destKey, size, docId.bits); 124 | addr.bits = insKey.bits; 125 | 126 | if(( slot = vectorPush(docMap, ver->keys, addr))) 127 | destKey->vecIdx = slot; 128 | else 129 | return result.status = DB_ERROR_outofmemory, result.objType = objErr, result; 130 | 131 | result.status = mvcc_insertKeyValue(idxHndl, destKey); 132 | return result; 133 | } 134 | 135 | // allocates and installs a new 136 | // document version by fitting it between the previous 137 | // version base and the sizeof the Doc structure plus the 138 | // size of the key vector of document version keys. 139 | 140 | MVCCResult mvcc_installNewVersion(Handle *docHndl, uint32_t valSize, DbAddr *docSlot, uint16_t keyCnt) { 141 | MVCCResult result = { 142 | .value = 0, .count = 0, .objType = objDoc, .object = 0, .status = DB_OK}; 143 | DbMap* docMap = MapAddr(docHndl); 144 | Doc *prevDoc = NULL; 145 | uint32_t verSize; 146 | DocId docId[1]; 147 | Ver* ver; 148 | Doc* doc; 149 | 150 | if (docSlot == NULL) 151 | return result.status = DB_ERROR_badhandle, result; 152 | 153 | if( docId->bits = docSlot->bits ) 154 | prevDoc = getObj(docMap, *docSlot); 155 | 156 | verSize = sizeof(Ver) + keyCnt * sizeof(DbAddr) + valSize; 157 | 158 | // allocate space in docStore for a new mvcc document 159 | // or add new version to existing Document 160 | 161 | if (prevDoc == NULL || sizeof(Doc) + verSize > prevDoc->commitVer ) 162 | result = mvcc_chainNewDoc(docHndl, docSlot, valSize, keyCnt); 163 | 164 | if( result.status != DB_ERROR_outofmemory) 165 | return result; 166 | 167 | // new version fits below existing or new space 168 | 169 | result =(MVCCResult) { 170 | .value = 0, .count = 0, .objType = 0, .status = DB_OK}; 171 | 172 | // configure pending version under commit (committed) 173 | // install new head of version chain -- 174 | // subtract verSize from commitVer 175 | // and store in pendingVer 176 | 177 | doc = getObj(docMap, *docSlot); 178 | doc->pendingVer = doc->commitVer - verSize; 179 | 180 | ver = (Ver*)(doc->dbDoc->base + doc->pendingVer); 181 | ver->verNo = ++doc->verNo; 182 | 183 | ver->stop->offset = doc->pendingVer; 184 | ver->stop->verSize = verSize; 185 | 186 | ver = (Ver*)(doc->dbDoc->base + doc->commitVer); 187 | ver->keys->vecMax = keyCnt; 188 | ver->keys->vecLen = 0; 189 | 190 | // install version offset in doc 191 | 192 | doc->op = OpWrt; 193 | 194 | result.object = doc; 195 | return result; 196 | } 197 | 198 | // write new document version into collection 199 | // pass docId slot address with existing document dbaddr 200 | // or zero to allocate an Id for the new document. 201 | 202 | // the docId slot remains locked until each of the keys are 203 | // installed as either no change, or new. 204 | 205 | MVCCResult mvcc_writeDoc(Txn *txn, DbHandle dbHndl, DocId *docId, uint32_t valSize, uint8_t *valBytes, uint16_t keyCnt) { 206 | MVCCResult result = { 207 | .value = 0, .count = 0, .objType = 0, .status = DB_OK }; 208 | Handle *docHndl = bindHandle(dbHndl, Hndl_docStore); 209 | DbMap *docMap = MapAddr(docHndl); 210 | Doc *doc; 211 | DbAddr *docSlot; 212 | 213 | if (!docId->bits) 214 | docId->bits = allocObjId(docMap); 215 | 216 | docSlot = fetchIdSlot(docMap, *docId); 217 | 218 | if(docSlot->bits) 219 | result = mvcc_installNewVersion(docHndl, valSize, docSlot, keyCnt); 220 | else 221 | result = mvcc_chainNewDoc(docHndl, docSlot, valSize, keyCnt); 222 | 223 | if (result.status == DB_OK) { 224 | doc = result.object; 225 | memcpy(doc->dbDoc->base, valBytes, valSize); 226 | result = mvcc_addDocWrToTxn(txn, docHndl, doc); 227 | } 228 | 229 | return result; 230 | } -------------------------------------------------------------------------------- /mvcc_dbdoc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | typedef uint32_t (*FillFcn)(uint8_t* rec, uint32_t size, void *fillOpt); 4 | typedef int (*Intfcnp)(); 5 | 6 | 7 | 8 | 9 | MVCCResult mvcc_findCursorVer(DbCursor* dbCursor, DbMap* map, DbMvcc* dbMvcc, Ver* ver); 10 | MVCCResult mvcc_addDocRdToTxn(Txn* txn, Handle *docHndl, Ver* ver); 11 | MVCCResult mvcc_addDocWrToTxn(Txn* txn, Handle *docHndl, Doc* doc); 12 | 13 | Ver * mvcc_getVersion(DbMap *map, Doc *doc, uint64_t verNo); 14 | uint64_t mvcc_allocDocStore(Handle* docHndl, uint32_t size, bool zeroit); 15 | DbStatus mvcc_installKeys(Handle* idxHndls[1], Ver* ver); 16 | DbStatus mvcc_removeKeys(Handle* idxHndls[1], Ver* ver, DbMmbr* mmbr, DbAddr* slot); 17 | -------------------------------------------------------------------------------- /mvcc_dbidx.c: -------------------------------------------------------------------------------- 1 | // mvcc_dbidx.c 2 | 3 | #include "mvcc.h" 4 | 5 | DbStatus mvcc_insertKeyValue(Handle *idxHndl, MVCCKeyValue *keyValue) { 6 | 7 | return DB_OK; 8 | } 9 | 10 | -------------------------------------------------------------------------------- /mvcc_dbidx.h: -------------------------------------------------------------------------------- 1 | // index key mgmt 2 | 3 | #pragma once 4 | 5 | // version Keys stored in docStore 6 | // and inserted into the index vector 7 | 8 | typedef struct { 9 | DbKeyValue kv[1]; 10 | DbAddr bytes; 11 | uint32_t refCnt[1]; 12 | uint16_t vecIdx; // index in document key vector 13 | uint16_t suffix; // number of suffix bytes 14 | uint64_t keyHash; // used by MVCC if key changed 15 | } MVCCKeyValue; 16 | 17 | uint64_t mvcc_allocDocStore(Handle* docHndl, uint32_t size, bool zeroit); 18 | DbStatus mvcc_insertKeyValue(Handle *idxHndl, MVCCKeyValue *kv); -------------------------------------------------------------------------------- /mvcc_dbssn.h: -------------------------------------------------------------------------------- 1 | //Serial Safety Net 2 | 3 | typedef enum { 4 | SsnReaders, 5 | SsnCommit 6 | } SsnValidate; 7 | 8 | Ver *mvcc_getVersion(DbMap *dbMap, Doc *doc, uint64_t verNo); 9 | 10 | DbStatus mvcc_scan1(Txn *txn); 11 | DbStatus mvcc_scan2(Txn *txn); 12 | DbStatus mvcc_scan3(Txn *txn); 13 | DbStatus mvcc_scan4(Txn *txn); 14 | 15 | DbMap* txnMap, memMap[1]; 16 | -------------------------------------------------------------------------------- /mvcc_dbssn1.c: -------------------------------------------------------------------------------- 1 | // scan txn steps 2 | 3 | // implement transactions 4 | 5 | #include "mvcc.h" 6 | 7 | DbStatus mvcc_scan1(Txn* txn) { 8 | DbAddr next, finalAddr; 9 | DbAddr *docSlot; 10 | bool result = true; 11 | DbMap *docMap = NULL; 12 | Handle *docHndl = NULL; 13 | uint64_t verNo = 0; 14 | DocId docId; 15 | ObjId objId; 16 | Frame *frame; 17 | Doc *doc; 18 | Ver *ver; 19 | int idx; 20 | 21 | //for v in t.writes: # finalize \eta(T) 22 | //t.pstamp = max(t.pstamp, v.pstamp 23 | 24 | 25 | // evaluate writes of versions by this txn 26 | // that were later overwritten concurrently with our 27 | // transaction and finalize pi(txn) 28 | 29 | // precommit 30 | // scan1 wrt start 31 | 32 | if ((next.bits = txn->wrtFirst->bits)) 33 | finalAddr.bits = txn->wrtFrame->bits; 34 | else { 35 | next.bits = txn->wrtFrame->bits; 36 | finalAddr.bits = 0; 37 | } 38 | 39 | while (next.addr) { 40 | frame = getObj(txnMap, next); 41 | 42 | for (idx = 0; idx < next.nslot; idx++) { 43 | objId.bits = frame->slots[idx]; 44 | 45 | switch (objId.step) { 46 | default: 47 | continue; 48 | 49 | case TxnMap: 50 | if (docHndl) 51 | releaseHandle(docHndl); 52 | 53 | if(docHndl = fetchIdSlot(hndlMap, objId)) { 54 | docMap = MapAddr(docHndl); 55 | continue; 56 | } 57 | 58 | return DB_ERROR_badtxnstep; 59 | 60 | case TxnVer: 61 | verNo = objId.verNo; 62 | continue; 63 | 64 | case TxnWrt: 65 | docId.bits = objId.bits; 66 | docSlot = fetchIdSlot(docMap, docId); 67 | doc = getObj(docMap, *docSlot); 68 | break; 69 | } 70 | 71 | // for v in t.writes: # finalize \eta(T) 72 | // t.pstamp = max(t.pstamp, v.pstamp) 73 | 74 | // ignore a read of our own new version 75 | 76 | if (doc->op == OpWrt) 77 | if (doc->txnId.bits == txn->txnId.bits) 78 | continue; 79 | 80 | // is there another committed version 81 | // after our version? Was there another 82 | // version committed after our read? 83 | 84 | if (verNo + 1 < doc->verNo) 85 | ver = mvcc_getVersion(docMap, doc, verNo + 1); 86 | else 87 | continue; 88 | 89 | // is our read version overwritten yet? check 90 | // if it was committed with higher timestamp 91 | 92 | waitNonZero64(ver->commit->lowHi + 1); 93 | 94 | if (timestampCmp(txn->commit, ver->commit, 0, 0) < 0) continue; 95 | 96 | timestampCAX(txn->sstamp, ver->sstamp, 1, 'r', 'b'); 97 | continue; 98 | } 99 | 100 | if (!(next.bits = frame->prev.bits)) { 101 | next.bits = finalAddr.bits; 102 | finalAddr.bits = 0; 103 | } 104 | } 105 | 106 | if (docHndl) releaseHandle(docHndl); 107 | return DB_OK; 108 | } -------------------------------------------------------------------------------- /mvcc_dbssn2.c: -------------------------------------------------------------------------------- 1 | // scan txn steps 2 | 3 | // implement transactions 4 | 5 | #include "mvcc.h" 6 | 7 | DbStatus mvcc_scan2(Txn *txn) { 8 | DbAddr next, addr, finalAddr; 9 | DbAddr *docSlot; 10 | Ver *ver; 11 | bool result = true; 12 | DbMap *docMap = NULL; 13 | Handle *docHndl = NULL; 14 | uint64_t verNo = 0; 15 | ObjId docId; 16 | ObjId objId; 17 | Frame *frame; 18 | Doc *doc; 19 | int idx; 20 | 21 | // evaluate writes by this txn 22 | // to finalize eta(tn) 23 | 24 | if ((next.bits = txn->wrtFirst->bits)) 25 | finalAddr.bits = txn->wrtFrame->bits; 26 | else { 27 | next.bits = txn->wrtFrame->bits; 28 | finalAddr.bits = 0; 29 | } 30 | 31 | while ((addr.bits = next.bits)) 32 | { 33 | frame = getObj(txnMap, addr); 34 | 35 | for (idx = 0; idx < addr.nslot; idx++) 36 | { 37 | objId.bits = frame->slots[idx]; 38 | 39 | switch (objId.step) { 40 | default: 41 | continue; 42 | 43 | case TxnMap: 44 | if (docHndl) 45 | releaseHandle(docHndl); 46 | 47 | if(docHndl = fetchIdSlot(hndlMap, objId)) { 48 | docMap = MapAddr(docHndl); 49 | continue; 50 | } 51 | 52 | return DB_ERROR_badtxnstep; 53 | 54 | case TxnVer: 55 | verNo = objId.verNo; 56 | continue; 57 | 58 | case TxnWrt: 59 | docId.bits = objId.bits; 60 | docSlot = fetchIdSlot(docMap, docId); 61 | doc = getObj(docMap, *docSlot); 62 | break; 63 | } 64 | 65 | // is this a read of our own new version? 66 | 67 | if (doc->op == OpWrt) 68 | if (doc->txnId.bits == txn->txnId.bits) 69 | continue; 70 | 71 | // is there another committed version 72 | // after our version? Was there another 73 | // version committed after our read? 74 | 75 | if (verNo + 1 < doc->verNo) 76 | ver = mvcc_getVersion(docMap, doc, verNo + 1); 77 | else 78 | continue; 79 | 80 | // is our read version overwritten yet? check 81 | // if it was committed with higher timestamp 82 | 83 | waitNonZero64(ver->commit->lowHi + 1); 84 | 85 | if (timestampCmp(txn->commit, ver->commit, 0, 0) > 0) 86 | timestampCAX(txn->sstamp, ver->sstamp, 1, 'r', 'b'); 87 | 88 | continue; 89 | } 90 | 91 | if (!(next.bits = frame->prev.bits)) { 92 | next.bits = finalAddr.bits; 93 | finalAddr.bits = 0; 94 | } 95 | } 96 | 97 | if (docHndl) 98 | releaseHandle(docHndl); 99 | 100 | return DB_OK; 101 | } 102 | -------------------------------------------------------------------------------- /mvcc_dbssn3.c: -------------------------------------------------------------------------------- 1 | // scan txn steps 2 | 3 | // implement transactions 4 | 5 | #include "mvcc.h" 6 | 7 | DbStatus mvcc_scan3(Txn *txn) { 8 | DbAddr addr, next, finalAddr; 9 | Timestamp pstamp[1]; 10 | Ver *ver; 11 | bool result = true; 12 | DbMap *docMap = NULL; 13 | Handle *docHndl = NULL; 14 | uint64_t verNo; 15 | DocId docId; 16 | ObjId objId; 17 | Frame *frame; 18 | Doc *doc; 19 | int idx; 20 | 21 | // final pre-commit step 22 | // exclusion test 23 | 24 | // # finalize \pi(T) 25 | // t.sstamp = min(t.sstamp, t.cstamp) 26 | // for v in t.reads: 27 | // t.sstamp = min(t.sstamp, v.sstamp) 28 | // ssn_check_exclusion(t) 29 | // t.status = COMMITTED 30 | 31 | result = timestampCmp(txn->sstamp, txn->pstamp, 0, 0) <= 0 ? false : true; 32 | 33 | if (result) 34 | txn->state = TxnCommitted; 35 | else 36 | txn->state = TxnRollback; 37 | 38 | // finalize txn->sstamp from the readSet 39 | // Post Commit 40 | 41 | // process the reader pstamp from our commit time 42 | // return reader set Frames. 43 | 44 | 45 | if ((next.bits = txn->rdrFirst->bits)) 46 | finalAddr.bits = txn->rdrFrame->bits; 47 | else { 48 | next.bits = txn->rdrFrame->bits; 49 | finalAddr.bits = 0; 50 | } 51 | 52 | while ((addr.bits = next.bits)) { 53 | frame = getObj(txnMap, addr); 54 | 55 | for (idx = 0; idx < addr.nslot; idx++) { 56 | objId.bits = frame->slots[idx]; 57 | 58 | switch (objId.step) { 59 | case TxnRaw: 60 | continue; 61 | 62 | default: 63 | continue; 64 | 65 | case TxnMap: 66 | if (docHndl) releaseHandle(docHndl); 67 | docHndl = fetchIdSlot(hndlMap, objId); 68 | docMap = MapAddr(docHndl); 69 | continue; 70 | 71 | case TxnRdr: 72 | docId.bits = objId.bits; 73 | break; 74 | } 75 | 76 | DbAddr *docSlot = fetchIdSlot(docMap, docId); 77 | verNo = frame->slots[idx]; 78 | 79 | // if we also write this read-set mmbr, skip it 80 | 81 | doc = getObj(docMap, *docSlot); 82 | 83 | if (doc->verNo == verNo && (doc->op & OpCommit)) continue; 84 | 85 | ver = mvcc_getVersion(docMap, doc, verNo); 86 | 87 | // keep largest ver pstamp 88 | 89 | timestampInstall(pstamp, ver->pstamp, 's', 'b'); 90 | 91 | while (timestampCmp(pstamp, txn->commit, 0, 0) > 0) 92 | if (atomicCAS128(ver->pstamp->tsBits, pstamp->tsBits, txn->commit->tsBits)) 93 | break; 94 | else 95 | timestampInstall(pstamp, ver->pstamp, 's', 'b'); 96 | } 97 | 98 | if (!(next.bits = frame->prev.bits)) { 99 | next.bits = finalAddr.bits; 100 | finalAddr.bits = 0; 101 | } 102 | 103 | returnFreeFrame(txnMap, addr); 104 | } 105 | 106 | if (docHndl) releaseHandle(docHndl); 107 | 108 | docHndl = NULL; 109 | return DB_OK; 110 | } 111 | // scan 3 rdr end 112 | -------------------------------------------------------------------------------- /mvcc_dbssn4.c: -------------------------------------------------------------------------------- 1 | // scan txn steps 2 | 3 | // implement transactions 4 | 5 | #include "mvcc.h" 6 | 7 | DbStatus mvcc_scan4(Txn *txn) { 8 | DbAddr *slot, addr, next, finalAddr; 9 | Ver *ver, *prevVer; 10 | bool result = true; 11 | DbMap *docMap = NULL; 12 | Handle *docHndl = NULL; 13 | uint64_t verNo; 14 | ObjId objId; 15 | Frame *frame; 16 | Doc *doc; 17 | int idx; 18 | 19 | // finally, commit wrt set version 20 | 21 | if ((next.bits = txn->wrtFirst->bits)) 22 | finalAddr.bits = txn->wrtFrame->bits; 23 | else { 24 | next.bits = txn->wrtFrame->bits; 25 | finalAddr.bits = 0; 26 | } 27 | 28 | // pre-commit 29 | // Scan transaction reads 30 | 31 | while ((addr.bits = next.bits)) { 32 | frame = getObj(txnMap, addr); 33 | 34 | for (idx = 0; idx < addr.nslot; idx++) { 35 | objId.bits = frame->slots[idx]; 36 | 37 | switch (objId.step) { 38 | case TxnRaw: 39 | continue; 40 | 41 | case TxnMap: 42 | if (docHndl) releaseHandle(docHndl); 43 | docHndl = fetchIdSlot(hndlMap, objId); 44 | docMap = MapAddr(docHndl); 45 | continue; 46 | 47 | default: 48 | continue; 49 | 50 | case TxnWrt: 51 | verNo = frame->slots[idx]; 52 | slot = fetchIdSlot(docMap, objId); 53 | lockLatch(slot->latch); 54 | 55 | doc = getObj(docMap, *slot); 56 | if (doc->verNo == verNo) break; 57 | 58 | if (doc->op & OpCommit) break; 59 | 60 | unlockLatch(slot->latch); 61 | continue; 62 | } 63 | 64 | // find previous version 65 | 66 | prevVer = (Ver *)(doc->dbDoc->base + doc->commitVer); 67 | ver = (Ver *)(doc->dbDoc->base + doc->pendingVer); 68 | 69 | if (doc->commitVer) timestampInstall(prevVer->sstamp, txn->commit, 'd', 'd'); 70 | 71 | timestampInstall(ver->commit, txn->commit, 'd', 'd'); 72 | timestampInstall(ver->pstamp, txn->commit, 'd', 'd'); 73 | ver->sstamp->lowHi[0] = 0; 74 | ver->sstamp->lowHi[1] = ~0ULL; 75 | doc->txnId.bits = 0; 76 | doc->op = TxnDone; 77 | 78 | unlockLatch(slot->latch); 79 | continue; 80 | } 81 | 82 | if (!(next.bits = frame->prev.bits)) { 83 | next.bits = finalAddr.bits; 84 | finalAddr.bits = 0; 85 | } 86 | 87 | returnFreeFrame(txnMap, addr); 88 | } 89 | if (docHndl) 90 | releaseHandle(docHndl); 91 | 92 | return DB_OK; 93 | } -------------------------------------------------------------------------------- /mvcc_dbtxn.h: -------------------------------------------------------------------------------- 1 | // mvcc transactions 2 | 3 | #pragma once 4 | 5 | #include "base64.h" 6 | #include "db.h" 7 | #include "db_api.h" 8 | #include "Hi-Performance-Timestamps/timestamps.h" 9 | 10 | // MVCC and TXN definitions for DATABASE project 11 | 12 | DbMap* txnMap, memMap[1]; 13 | 14 | DbStatus mvcc_scan1(Txn* txn); 15 | DbStatus mvcc_scan2(Txn* txn); 16 | DbStatus mvcc_scan3(Txn* txn); 17 | DbStatus mvcc_scan4(Txn* txn); -------------------------------------------------------------------------------- /oldbtree1.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "base64.h" 4 | #include "db_index.h" 5 | #include "db_malloc.h" 6 | #include "db_error.h" 7 | #include "db_map.h" 8 | #include "db_api.h" 9 | #include "../rwlock/readerwriter.h" 10 | 11 | // BTree configuration and options 12 | 13 | // #define *+ (2 + 7 + 1) 14 | #define Btree1_maxbits 29 // maximum page size in bits 15 | #define Btree1_minbits 9 // minimum page size in bits 16 | #define Btree1_minpage (1 << Btree1_minbits) // minimum page size 17 | #define Btree1_maxpage (1 << Btree1_maxbits) // maximum page size 18 | #define Btree1_keylenbits (15) 19 | #define Btree1_maxkey (1 << Btree1_keybits) // maximum key length 20 | 21 | // There are four lock types for each node in three independent sets: 22 | // 1. (set 1) ReadLock: Sharable. Read the node. Incompatible with WriteLock. 23 | // 2. (set 1) WriteLock: Exclusive. Modify the node. Incompatible with ReadLock and other WriteLocks. 24 | // 3. (set 2) ParentModification: Exclusive. Change the node's parent keys. Incompatible with another ParentModification. 25 | // 4. (set 3) LinkModification: Exclusive. Update of a node's left link is underway. Incompatible with another LinkModification. 26 | 27 | typedef enum { 28 | Btree1_lockRead = 1, 29 | Btree1_lockWrite = 2, 30 | Btree1_lockParent = 4, 31 | Btree1_lockLink = 8 32 | } Btree1Lock; 33 | 34 | typedef ObjId PageId; 35 | 36 | // types of btree pages/allocations 37 | 38 | typedef enum{ 39 | Btree1_rootPage = 3, 40 | Btree1_interior, 41 | Btree1_leafPage, 42 | MAXBtree1Type 43 | } Btree1PageType; 44 | 45 | // page address 46 | 47 | // Btree1Index global data on disk after Arena 48 | // Global Index data structure after DbArena object 49 | 50 | typedef struct { 51 | DbIndex dbIndex[1]; 52 | uint32_t pageSize; 53 | uint32_t pageBits; 54 | uint32_t leafXtra; 55 | uint32_t librarianDensity;// 2 == every other key 56 | PageId root; 57 | PageId left; // leftmost page level 0 58 | PageId right; // rightmost page lvl 0 59 | } Btree1Index; 60 | 61 | // Btree page layout 62 | 63 | typedef struct { 64 | RWLock readwr[1]; // read/write access lock 65 | RWLock parent[1]; // posting of fence key 66 | RWLock link[1]; // left link update 67 | } LatchSet; 68 | 69 | // The page structure is immediately 70 | // followed by an array of the key slots 71 | // and key strings on this page, allocated top-down 72 | 73 | typedef struct { 74 | LatchSet latch[1]; // latches for this page 75 | uint32_t cnt; // count of keys in page 76 | uint32_t act; // count of active keys 77 | uint32_t min; // next page key end offset 78 | uint32_t garbage; // page garbage in bytes 79 | Btree1PageType type:4; 80 | uint8_t lvl:4; // level of page in btree 81 | uint8_t free:1; // page is unused on free chain 82 | uint8_t kill:1; // page is being deleted 83 | PageId right; // page to right 84 | PageId left; // page to left 85 | PageId self; // current page no 86 | } Btree1Page; 87 | 88 | typedef struct { 89 | uint8_t *keyVal; 90 | uint32_t keyLen; 91 | int64_t *suffix; 92 | uint32_t suffixCnt; 93 | Btree1Page *page; // current page Addr 94 | uint32_t slotIdx; // slot on page 95 | } Btree1Set; 96 | 97 | // Page key slot definition. 98 | 99 | // Keys are marked dead, but remain on the page until 100 | // it cleanup is called. 101 | 102 | // Slot types 103 | 104 | // In addition to the Unique keys that occupy slots 105 | // there are Librarian slots in the key slot array. 106 | 107 | // The Librarian slots are dead keys that 108 | // serve as filler, available to add new keys. 109 | 110 | typedef enum { 111 | Btree1_indexed, // key was indexed 112 | Btree1_deleted, // key was deleted 113 | Btree1_librarian, // librarian slot 114 | Btree1_fenceKey, // fence key for page 115 | Btree1_stopper // stopper slot 116 | } Btree1SlotType; 117 | 118 | typedef union { 119 | uint64_t bits[2]; 120 | 121 | struct { 122 | uint32_t off : 29; // key bytes offset 123 | uint32_t type : 2; // type of key slot 124 | uint32_t dead : 1; // dead/librarian slot 125 | uint32_t length; // key length 126 | }; 127 | union { 128 | PageId childId; // page Id of next level to leaf 129 | ObjId payLoad; 130 | }; 131 | } Btree1Slot; 132 | 133 | typedef struct { 134 | DbCursor base[1]; // base object 135 | uint32_t leafSize; 136 | uint32_t slotIdx; // cursor position index 137 | Btree1Page page[]; // cursor position page buffer 138 | } Btree1Cursor; 139 | 140 | // access macros 141 | 142 | #define slotptr(page, slot) (((Btree1Slot *)(page+1)) + (((int)slot)-1)) 143 | 144 | #define btree1index(map) ((Btree1Index *)(map->arena + 1)) 145 | 146 | #define keyaddr(page, off) ((uint8_t *)((uint8_t *)(page) + off)) 147 | #define keyptr(page, slot) ((uint8_t *)((uint8_t *)(page) + slotptr(page, slot)->off)) 148 | 149 | // btree1 implementation 150 | 151 | DbStatus btree1NewCursor(DbCursor *cursor, DbMap *map); 152 | DbStatus btree1ReturnCursor(DbCursor *dbCursor, DbMap *map); 153 | 154 | DbStatus btree1LeftKey(DbCursor *cursor, DbMap *map); 155 | DbStatus btree1RightKey(DbCursor *cursor, DbMap *map); 156 | 157 | DbStatus btree1FindKey(DbCursor *dbCursor, DbMap *map, void *key, uint32_t keylen, bool onlyOne); 158 | DbStatus btree1NextKey (DbCursor *cursor, DbMap *map); 159 | DbStatus btree1PrevKey (DbCursor *cursor, DbMap *map); 160 | 161 | DbStatus btree1StoreSlot (Handle *hndl, uint8_t *key, uint32_t keyLen, int64_t *values, uint32_t valueCnt); 162 | DbStatus btree1Init(Handle *hndl, Params *params); 163 | DbStatus btree1InsertKey(Handle *hndl, uint8_t *key, uint32_t keyLen, uint32_t sfxLen, uint8_t lvl, Btree1SlotType type); 164 | DbStatus btree1DeleteKey(Handle *hndl, void *key, uint32_t keyLen); 165 | 166 | DbStatus btree1LoadPage(DbMap *map, Btree1Set *set, Btree1Lock lockMode); 167 | 168 | DbStatus btree1CleanPage(Handle *hndl, Btree1Set *set, uint32_t totKeyLen); 169 | DbStatus btree1SplitPage (Handle *hndl, Btree1Set *set); 170 | DbStatus btree1FixKey (Handle *index, uint8_t *fenceKey, uint64_t prev, uint64_t suffix, uint8_t lvl, bool stopper); 171 | DbStatus btree1InsertSfxKey(Handle *hndl, uint8_t *key, uint32_t keyLen, uint64_t suffix, uint8_t lvl, Btree1SlotType type); 172 | 173 | Btree1Page *btree1NewPage(Handle *index, uint8_t lvl, Btree1PageType type); 174 | void btree1LockPage(Btree1Page *page, Btree1Lock mode); 175 | void btree1UnlockPage(Btree1Page *page, Btree1Lock mode); 176 | int btree1KeyCmp (uint8_t *key1, uint8_t *key2, uint32_t len2); 177 | -------------------------------------------------------------------------------- /testfiles/test1: -------------------------------------------------------------------------------- 1 | AsfAGHM5om 00000000000000000000000000000000: 0000222200002222000022220000222200002222000000001111 2 | ~sHd0jDv6X 00000000000000000000000000000001: 77779999444488885555CCCC777755555555BBBB666644446666 3 | uI^EYm8s=| 00000000000000000000000000000002: CCCCFFFF777799995555FFFF11112222999988884444DDDDFFFF 4 | Q)JN)R9z-L 00000000000000000000000000000003: FFFF111100000000000066668888BBBB33333333AAAA1111CCCC 5 | o4FoBkqERn 00000000000000000000000000000004: 7777AAAABBBBBBBB22224444444499995555BBBB11118888DDDD 6 | *}-Wz1;TD- 00000000000000000000000000000005: AAAA88883333BBBB888888884444777722227777999900002222 7 | 0fssx}~[oB 00000000000000000000000000000006: FFFF999977774444AAAA7777EEEEDDDDAAAAAAAA99998888BBBB 8 | mz4VCN@a#" 00000000000000000000000000000007: DDDDBBBB1111FFFF2222DDDDFFFFBBBBFFFF6666444477778888 9 | my+=5r7(N| 00000000000000000000000000000008: 22226666CCCC66662222FFFF0000EEEE11118888444455559999 10 | 5HA\z%qt{% 00000000000000000000000000000009: 0000AAAA8888FFFF0000888800000000222255551111FFFFEEEE 11 | abc:def 12 | abcde:f 13 | abcd:ef 14 | -------------------------------------------------------------------------------- /testfiles/test1.bat: -------------------------------------------------------------------------------- 1 | @ECHO off 2 | @ECHO Alpha testing of binary string fields 3 | 4 | del testdb* 5 | 6 | @ECHO expect 13 sorted keys 7 | @ECHO on 8 | 9 | standalone testdb -cmds=w -summary=skn -idxBinary=: testfiles/test1 10 | -------------------------------------------------------------------------------- /testfiles/test1.wsl: -------------------------------------------------------------------------------- 1 | echo Alpha testing of binary string fields 2 | 3 | rm testdb* 2>/dev/null 4 | 5 | echo expect 13 sorted keys 6 | set -v 7 | ./standalone testdb -cmds=w -summary=skn -idxBinary=: -keyLen=10 testfiles/test1 8 | -------------------------------------------------------------------------------- /testfiles/test2.bat: -------------------------------------------------------------------------------- 1 | @ECHO off 2 | 3 | @ECHO "40M random 10 byte keys, single threaded insertions into each index type" 4 | 5 | del testdb* 6 | 7 | @ECHO on 8 | rem standalone.exe testdb -stats -cmds=w -summary=vc -idxType=2 -bits=16 -inMem -noDocs -pennysort 40000000 9 | 10 | standalone.exe testdb -stats -cmds=w -summary=vc -idxType=1 -bits=16 -inMem -noDocs -pennysort 40000000 11 | 12 | standalone.exe testdb -stats -cmds=w -summary=vc -idxType=0 -inMem -noDocs -pennysort 40000000 13 | -------------------------------------------------------------------------------- /testfiles/test2.wsl: -------------------------------------------------------------------------------- 1 | echo "40M random 10 byte keys, single threaded insertions into each index type" 2 | 3 | rm testdb* 2>&1 4 | set -v 5 | 6 | #./standalone.exe testdb -stats -cmds=w -summary=vc -idxType=2 -bits=16 -inMem -noDocs -pennysort 40000000 7 | 8 | ./standalone.exe testdb -stats -cmds=w -summary=vc -idxType=1 -bits=16 -inMem -noDocs -pennysort 40000000 9 | 10 | ./standalone.exe testdb -stats -cmds=w -summary=vc -idxType=0 -inMem -noDocs -pennysort 40000000 11 | -------------------------------------------------------------------------------- /testfiles/test3.bat: -------------------------------------------------------------------------------- 1 | @ECHO off 2 | @ECHO "40M random 10 byte keys, multi-threaded insertions into Adaptive Radix Tree index"" 3 | 4 | del testdb* 5 | 6 | @ECHO on 7 | standalone testdb -prng=2 -stats -cmds=w -summary=vc -idxType=0 -bits=16 -inMem -noDocs -pennysort -threads=4 10000000 8 | -------------------------------------------------------------------------------- /testfiles/test3.wsl: -------------------------------------------------------------------------------- 1 | echo "40M random 10 byte keys, multi-threaded insertions into Adaptive Radix Tree index" 2 | 3 | rm testdb* 2>/dev/null 4 | set -v 5 | 6 | ./standalone testdb -prng=2 -stats -cmds=w -summary=vc -idxType=0 -bits=16 -inMem -noDocs -pennysort -threads=4 10000000 7 | -------------------------------------------------------------------------------- /testfiles/test4.bat: -------------------------------------------------------------------------------- 1 | @ECHO off 2 | @ECHO "40M random 10 byte keys, multi-threaded insertions into btree1 index" 3 | 4 | del testdb* 5 | 6 | @ECHO on 7 | standalone testdb -prng=2 -stats -cmds=w -summary=vc -idxType=1 -bits=16 -inMem -noDocs -pennysort -threads=4 10000000 8 | 9 | -------------------------------------------------------------------------------- /testfiles/test4.wsl: -------------------------------------------------------------------------------- 1 | echo "40M random 10 byte keys, multi-threaded insertions into btree1 index" 2 | 3 | rm testdb* 2>/dev/null 4 | set -v 5 | 6 | ./standalone testdb -prng=2 -stats -cmds=w -summary=vc -idxType=1 -bits=16 -inMem -noDocs -pennysort -threads=4 10000000 7 | 8 | -------------------------------------------------------------------------------- /testfiles/test5.bat: -------------------------------------------------------------------------------- 1 | @ECHO off 2 | @ECHO "10M random 10 byte keys, insertions then searches in ARTree index" 3 | 4 | del testdb* 5 | 6 | @ECHO on 7 | standalone testdb -prng=2 -stats -cmds=wf -summary=vc -idxType=0 -bits=16 -inMem -noDocs -pennysort 10000000 8 | 9 | -------------------------------------------------------------------------------- /testfiles/test5.wsl: -------------------------------------------------------------------------------- 1 | echo "10M random 10 byte keys, insertions then searches in ARTree index" 2 | 3 | rm testdb* 2>/dev/null 4 | set -v 5 | 6 | ./standalone testdb -prng=2 -stats -cmds=wf -summary=vc -idxType=0 -bits=16 -inMem -noDocs -pennysort 10000000 7 | 8 | -------------------------------------------------------------------------------- /testfiles/test6.bat: -------------------------------------------------------------------------------- 1 | @ECHO off 2 | @ECHO "40M random 10 byte keys, multi-threaded insertions then searches in btree1 index" 3 | 4 | del testdb* 5 | 6 | @ECHO on 7 | standalone testdb -prng=2 -stats -cmds=w -summary=vc -idxType=0 -bits=16 -noDocs -pennysort -threads=4 10000000 8 | 9 | standalone testdb -prng=2 -stats -cmds=f -summary=vc -idxType=0 -bits=16 -noDocs -pennysort -threads=4 10000000 10 | 11 | -------------------------------------------------------------------------------- /testfiles/test6.wsl: -------------------------------------------------------------------------------- 1 | echo "40M random 10 byte keys, multi-threaded insertions then searches in btree1 index" 2 | 3 | rm testdb* 2>/dev/null 4 | set -v 5 | 6 | ./standalone testdb -prng=2 -stats -cmds=w -summary=vc -idxType=1 -bits=16 -noDocs -pennysort -threads=4 10000000 7 | 8 | ./standalone testdb -prng=2 -stats -cmds=f -summary=vc -idxType=1 -bits=16 -noDocs -pennysort -threads=4 10000000 9 | 10 | -------------------------------------------------------------------------------- /testfiles/test7.bat: -------------------------------------------------------------------------------- 1 | @ECHO off 2 | @ECHO "40M random 10 byte keys, multi-threaded insertions then subset cursor over btree1 index" 3 | 4 | del testdb* 5 | 6 | @ECHO on 7 | standalone testdb -prng=2 -stats -cmds=w -summary=k -idxType=1 -bits=16 -minKey=aaaA -maxKey=aaaK -noDocs -pennysort -threads=4 10000000 8 | -------------------------------------------------------------------------------- /testfiles/test7.wsl: -------------------------------------------------------------------------------- 1 | echo "40M random 10 byte keys, multi-threaded insertions then subset cursor over btree1 index" 2 | 3 | rm testdb* 2>/dev/null 4 | set -v 5 | 6 | ./standalone testdb -prng=2 -stats -cmds=w -summary=k -idxType=1 -bits=16 -minKey=aaaA -maxKey=aaaK -noDocs -pennysort -threads=4 10000000 7 | -------------------------------------------------------------------------------- /testfiles/test8.bat: -------------------------------------------------------------------------------- 1 | @ECHO off 2 | @ECHO "40M random 10 byte keys, multi-threaded insertions then searches in btree1 index" 3 | 4 | del testdb* 5 | 6 | @ECHO on 7 | standalone testdb -prng=2 -txn=1000 -stats -cmds=w -summary=vc -idxType=0 -bits=16 -noDocs -pennysort -threads=4 10000000 8 | 9 | standalone testdb -prng=2 -stats -cmds=f -summary=vc -idxType=0 -bits=16 -noDocs -pennysort -threads=4 10000000 10 | 11 | -------------------------------------------------------------------------------- /vcvars.bat: -------------------------------------------------------------------------------- 1 | %comspec% /k "D:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat" 2 | --------------------------------------------------------------------------------