├── .gitignore ├── LICENSE ├── README.md ├── compaction.go ├── compaction_test.go ├── db.go ├── db_test.go ├── docs └── design.md ├── go.mod ├── go.sum ├── helper.go ├── inmemory.go ├── inmemory_test.go ├── iterator.go ├── iterator_test.go ├── l1policy.go ├── l1policy_test.go ├── level_handler.go ├── manifest.go ├── merge_builder.go ├── merge_builder_test.go ├── options.go ├── table.go ├── table_test.go ├── tree.go └── tree_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [2019] [sch00lb0y] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Aran 2 | Aran is an embedded key value storage based on new research paper [ 3 | Efficient Key-Value Stores with Ranged Log-Structured Merge Trees](https://ieeexplore.ieee.org/document/8457859) 4 | 5 | 6 | # Usage 7 | 8 | ```go 9 | opts := aran.DefaultOptions() 10 | db, err := aran.New(opts) 11 | if err != nil { 12 | panic(err) 13 | } 14 | defer db.Close() 15 | db.Set([]byte("hello"), []byte("schoolboy")) 16 | val, exist := db.Get([]byte("hello")) 17 | if !exist { 18 | panic("value not exist") 19 | } 20 | fmt.Println(string(val)) 21 | ``` 22 | ### Note 23 | Don't forget to close the db, otherwise some data will be lost. 24 | 25 | # Supported API 26 | 27 | - Get 28 | - Set 29 | 30 | # MileStones 31 | 32 | - Transaction API 33 | - WAL support 34 | - Loadbalancing small files 35 | 36 | # Contribution 37 | 38 | Don't think too much. just send a PR, if you need any feature or if you find any bug. 39 | 40 | Raising an issue is also a kind of help, so feel free to raise an issue if you find any bug. 41 | 42 | # Why another embedded KV store if badger already exist? 43 | 44 | I was bored so I wrote this on my weekend. And, `Go` is awesome. btw I like `Rust` too. 45 | 46 | # Acknowledgments 47 | - Thank you sci-hub for letting me to download the paper. Language and money should not be a barrier for gaining the knowledge (My opinion) 48 | - Thank you badger for inspiration. 49 | - Thank you [ 50 | Efficient Key-Value Stores with Ranged Log-Structured Merge Trees's](https://ieeexplore.ieee.org/document/8457859) author for writing beautiful piece of LSM. 51 | # About Me 52 | 53 | I go with the name [schoolboy](https://twitter.com/hi_balaji) and I do `Go` and little bit `Rust`. If you're looking for someone to collaborate with an open source project or to fill any junior dev position. You can DM me at [@hi_balaji](https://twitter.com/hi_balaji) 54 | 55 | # அரண் 56 | 57 | அரண் என்பது புதிய [ஆராய்ச்சியின்படி](https://ieeexplore.ieee.org/document/8457859) எழுதப்பட்ட ஒரு தகவல் சேமிப்பு நிரல். 58 | 59 | # பயன்பாட்டு முறை 60 | 61 | ```go 62 | opts := aran.DefaultOptions() 63 | db, err := aran.New(opts) 64 | if err != nil { 65 | panic(err) 66 | } 67 | defer db.Close() 68 | db.Set([]byte("வாழ்க"), []byte("மனிதாபிமானம்")) 69 | val, exist := db.Get([]byte("வாழ்க")) 70 | if !exist { 71 | panic("தகவல் கிடைக்கவில்லை") 72 | } 73 | fmt.Println(string(val)) 74 | ``` 75 | ### குறிப்பு 76 | 77 | close அழைக்கவும், இல்லையென்றால் தகவல்களை இழக்க நேரிடும் 78 | 79 | # பங்குஅளிப்பாளர் குறிப்பு 80 | 81 | நீங்க கண்ணா மூடிக்கிட்டு கவலைபடாம PR அனுப்பலாம் 82 | 83 | # ஒப்புகை 84 | 85 | - ஆய்வு கட்டுரையை பதிவு இரக்கம் செய்ய உதவிய sci-hub'கு நன்றி 86 | - எடுத்துக்காட்டாக இருந்த badger'கு நன்றி 87 | - ஆய்வு கட்டுரை எழுதிய ஆசிரியர்க்கு நன்றி [Efficient Key-Value Stores with Ranged Log-Structured Merge Trees](https://ieeexplore.ieee.org/document/8457859) 88 | 89 | # என்னை பற்றி 90 | எனது பெயர் பாலாஜி ஜின்னா. நான் ஒரு பொறியியல் கல்லூரி மாணவன். உங்களுக்கு சந்தேகம் அல்லது ஒரு புதிய நட்பை உருவாக்க விரும்பினால் நீங்கள் எனது கிச்சாக [முகவரிக்கு](https://twitter.com/hi_balaji) செய்தி அனுப்பலாம் -------------------------------------------------------------------------------- /compaction.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "hash/crc32" 17 | 18 | "github.com/sirupsen/logrus" 19 | ) 20 | 21 | func (d *db) handleNotUnion(p compactionPolicy, l0f tableManifest) { 22 | // normal push down 23 | newt := newTable(d.absPath, l0f.Idx) 24 | d.l1handler.addTable(newt, l0f.Idx) 25 | d.l0handler.deleteTable(l0f.Idx) 26 | d.manifest.addl1file(uint32(newt.fileInfo.entries), newt.fileInfo.minRange, newt.fileInfo.maxRange, int(newt.size), l0f.Idx) 27 | d.manifest.deleteL0Table(l0f.Idx) 28 | logrus.Info("compaction: NOT UNION found so simply pushing the l0 file to l1") 29 | } 30 | 31 | func (d *db) handleUnion(p compactionPolicy, l0f tableManifest) { 32 | t1, t2 := newTable(d.absPath, l0f.Idx), newTable(d.absPath, p.tableIDS[0]) 33 | d.mergeTable(t1, t2) 34 | logrus.Infof("compaction: UNION SET found so merged l0 %d with l1 %d, pushed to l1", t1.ID(), t2.ID()) 35 | t1.close() 36 | d.l0handler.deleteTable(t1.ID()) 37 | d.manifest.deleteL0Table(t1.ID()) 38 | removeTable(d.absPath, t1.ID()) 39 | logrus.Infof("compaction: l0 file has been deleted %d", t1.ID()) 40 | t2.close() 41 | d.l1handler.deleteTable(t2.ID()) 42 | d.manifest.deleteL1Table(t2.ID()) 43 | removeTable(d.absPath, t2.ID()) 44 | logrus.Infof("compaction: l1 file has been deleted %d", t2.ID()) 45 | } 46 | 47 | func (d *db) handleOverlapping(p compactionPolicy, l0f tableManifest) { 48 | logrus.Infof("compaction: OVERLAPPING found") 49 | builders := []*mergeTableBuilder{} 50 | // if the the value is not in the range, we'll create a new file and append everything 51 | // it it 52 | var extraBuilder *mergeTableBuilder 53 | // some crazy for loop has been written so try to refactor 54 | for _, idx := range p.tableIDS { 55 | t := newTable(d.absPath, idx) 56 | t.SeekBegin() 57 | builder := newTableMergeBuilder(int(t.size)) 58 | builder.append(t.fp, int64(t.fileInfo.metaOffset)) 59 | builder.mergeHashMap(t.offsetMap, 0) 60 | builders = append(builders, builder) 61 | } 62 | toCompacT := newTable(d.absPath, l0f.Idx) 63 | iter := toCompacT.iter() 64 | for iter.has() { 65 | kl, vl, key, val := iter.next() 66 | c := crc32.New(CastagnoliCrcTable) 67 | c.Write(key) 68 | hash := c.Sum32() 69 | for _, builder := range builders { 70 | if hash >= builder.Min() && hash <= builder.Max() { 71 | c := crc32.New(CastagnoliCrcTable) 72 | c.Write(key) 73 | hash := c.Sum32() 74 | builder.add(kl, vl, key, val, hash) 75 | continue 76 | } 77 | if extraBuilder == nil { 78 | extraBuilder = newTableMergeBuilder(10000000) 79 | } 80 | c := crc32.New(CastagnoliCrcTable) 81 | c.Write(key) 82 | hash := c.Sum32() 83 | extraBuilder.add(kl, vl, key, val, hash) 84 | } 85 | } 86 | for _, builder := range builders { 87 | d.saveL1Table(builder.finish()) 88 | } 89 | if extraBuilder != nil { 90 | d.saveL1Table(extraBuilder.finish()) 91 | } 92 | for _, idx := range p.tableIDS { 93 | d.l1handler.deleteTable(idx) 94 | removeTable(d.absPath, idx) 95 | d.manifest.deleteL1Table(idx) 96 | } 97 | d.l0handler.deleteTable(l0f.Idx) 98 | removeTable(d.absPath, l0f.Idx) 99 | d.manifest.deleteL0Table(l0f.Idx) 100 | } 101 | -------------------------------------------------------------------------------- /compaction_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "fmt" 17 | "testing" 18 | ) 19 | 20 | func testDB() *db { 21 | return &db{ 22 | manifest: &manifest{L0Files: make([]tableManifest, 0), L1Files: make([]tableManifest, 0), NextIDX: 5}, 23 | l0handler: newLevelHanlder(), 24 | l1handler: newLevelHanlder(), 25 | } 26 | } 27 | func TestL0(t *testing.T) { 28 | db := testDB() 29 | db.absPath = "./" 30 | t1 := testTable("vanakam", "nanbare", 1, 100, 1) 31 | t2 := testTable("vanakam", "nanbare", 1, 100, 2) 32 | t3 := testTable("vanakam", "nanbare", 1, 100, 3) 33 | t4 := testTable("vanakam", "nanbare", 1, 100, 4) 34 | db.l0handler.addTable(t1, 1) 35 | db.l0handler.addTable(t2, 2) 36 | db.l0handler.addTable(t3, 3) 37 | db.l0handler.addTable(t4, 4) 38 | db.manifest.addl0file(uint32(t1.fileInfo.entries), t1.fileInfo.minRange, t1.fileInfo.maxRange, int(t1.size), 1) 39 | db.manifest.addl0file(uint32(t2.fileInfo.entries), t2.fileInfo.minRange, t2.fileInfo.maxRange, int(t2.size), 2) 40 | db.manifest.addl0file(uint32(t3.fileInfo.entries), t3.fileInfo.minRange, t3.fileInfo.maxRange, int(t3.size), 3) 41 | db.manifest.addl0file(uint32(t4.fileInfo.entries), t4.fileInfo.minRange, t4.fileInfo.maxRange, int(t4.size), 4) 42 | db.L0Compaction() 43 | if len(db.manifest.L0Files) != 2 { 44 | t.Fatalf("expected 2 level 0 files but got %d", len(db.manifest.L0Files)) 45 | } 46 | if len(db.manifest.L1Files) != 1 { 47 | t.Fatalf("expected 1 level 1 files but got %d", len(db.manifest.L1Files)) 48 | } 49 | removeTestTable(3) 50 | removeTestTable(4) 51 | // 1 and 2 has merged as 6 52 | removeTestTable(6) 53 | } 54 | 55 | func TestUnion(t *testing.T) { 56 | db := testDB() 57 | db.absPath = "./" 58 | t1 := testTable("vanakam", "nanbare", 1, 100, 1) 59 | t2 := testTable("vanakam", "nanbare", 1, 100, 2) 60 | db.l0handler.addTable(t1, 1) 61 | db.l1handler.addTable(t2, 2) 62 | db.manifest.addl0file(uint32(t1.fileInfo.entries), t1.fileInfo.minRange, t1.fileInfo.maxRange, int(t1.size), 1) 63 | db.manifest.addl1file(uint32(t2.fileInfo.entries), t2.fileInfo.minRange, t2.fileInfo.maxRange, int(t2.size), 2) 64 | p := db.manifest.findL1Policy(db.manifest.L0Files[0]) 65 | if p.policy != UNION { 66 | t.Fatalf("expected UNION %d but got %d", UNION, p.policy) 67 | } 68 | db.handleUnion(p, db.manifest.L0Files[0]) 69 | if len(db.manifest.L0Files) != 0 { 70 | t.Fatalf("expected 0 level0 files but got %d", len(db.manifest.L0Files)) 71 | } 72 | if len(db.manifest.L1Files) != 1 { 73 | t.Fatalf("expected 1 level1 files but got %d", len(db.manifest.L1Files)) 74 | } 75 | removeTestTable(6) 76 | db = testDB() 77 | db.absPath = "./" 78 | t1 = testTable("vanakam", "nanbare", 40, 100, 1) 79 | t2 = testTable("vanakam", "nanbare", 1, 100, 2) 80 | db.l0handler.addTable(t1, 1) 81 | db.l1handler.addTable(t2, 2) 82 | db.manifest.addl0file(uint32(t1.fileInfo.entries), t1.fileInfo.minRange, t1.fileInfo.maxRange, int(t1.size), 1) 83 | db.manifest.addl1file(uint32(t2.fileInfo.entries), t2.fileInfo.minRange, t2.fileInfo.maxRange, int(t2.size), 2) 84 | p = db.manifest.findL1Policy(db.manifest.L0Files[0]) 85 | if p.policy != UNION { 86 | t.Fatalf("expected UNION %d but got %d", UNION, p.policy) 87 | } 88 | db.handleUnion(p, db.manifest.L0Files[0]) 89 | if len(db.manifest.L0Files) != 0 { 90 | t.Fatalf("expected 0 level0 files but got %d", len(db.manifest.L0Files)) 91 | } 92 | if len(db.manifest.L1Files) != 1 { 93 | t.Fatalf("expected 1 level1 files but got %d", len(db.manifest.L1Files)) 94 | } 95 | removeTestTable(6) 96 | } 97 | 98 | func TestNotUnion(t *testing.T) { 99 | db := testDB() 100 | db.absPath = "./" 101 | t1 := testTable("vanakam", "nanbare", 1, 100, 1) 102 | t2 := testTable("vanakam", "nanbare", 3000, 4000, 2) 103 | db.l0handler.addTable(t1, 1) 104 | db.l1handler.addTable(t2, 2) 105 | t2.fileInfo.minRange = t1.fileInfo.maxRange + 1 106 | t2.fileInfo.maxRange = t1.fileInfo.maxRange + t1.fileInfo.minRange 107 | db.manifest.addl0file(uint32(t1.fileInfo.entries), t1.fileInfo.minRange, t1.fileInfo.maxRange, int(t1.size), 1) 108 | db.manifest.addl1file(uint32(t2.fileInfo.entries), t2.fileInfo.minRange, t2.fileInfo.maxRange, int(t2.size), 2) 109 | p := db.manifest.findL1Policy(db.manifest.L0Files[0]) 110 | if p.policy != NOTUNION { 111 | t.Fatalf("expected NOTUNION %d but got %d", NOTUNION, p.policy) 112 | } 113 | db.handleNotUnion(p, db.manifest.L0Files[0]) 114 | if len(db.manifest.L0Files) != 0 { 115 | t.Fatalf("expected 0 level0 files but got %d", len(db.manifest.L0Files)) 116 | } 117 | if len(db.manifest.L1Files) != 2 { 118 | t.Fatalf("expected 2 level1 files but got %d", len(db.manifest.L1Files)) 119 | } 120 | removeTestTable(1) 121 | removeTestTable(2) 122 | } 123 | 124 | func TestOverlapping(t *testing.T) { 125 | db := testDB() 126 | db.absPath = "./" 127 | t1 := testTable("vanakam", "nanbare", 1, 100, 1) 128 | t2 := testTable("vanakam", "nanbare", 50, 10000, 2) 129 | 130 | t2.fileInfo.minRange = t1.fileInfo.minRange + 100 131 | t2.fileInfo.maxRange = t1.fileInfo.maxRange + t1.fileInfo.minRange 132 | db.l0handler.addTable(t1, 1) 133 | db.l1handler.addTable(t2, 2) 134 | db.manifest.addl0file(uint32(t1.fileInfo.entries), t1.fileInfo.minRange, t1.fileInfo.maxRange, int(t1.size), 1) 135 | db.manifest.addl1file(uint32(t2.fileInfo.entries), t2.fileInfo.minRange, t2.fileInfo.maxRange, int(t2.size), 2) 136 | fmt.Printf("%d %d %d %d", t1.fileInfo.minRange, t1.fileInfo.maxRange, t2.fileInfo.minRange, t2.fileInfo.maxRange) 137 | p := db.manifest.findL1Policy(db.manifest.L0Files[0]) 138 | if p.policy != OVERLAPPING { 139 | t.Fatalf("expected OVERLAPPING %d but got %d", NOTUNION, p.policy) 140 | } 141 | db.handleOverlapping(p, db.manifest.L0Files[0]) 142 | if len(db.manifest.L0Files) != 0 { 143 | t.Fatalf("expected 0 level0 files but got %d", len(db.manifest.L0Files)) 144 | } 145 | removeTestTable(6) 146 | } 147 | -------------------------------------------------------------------------------- /db.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "fmt" 17 | "hash/crc32" 18 | "os" 19 | "path/filepath" 20 | "sync" 21 | 22 | "github.com/sirupsen/logrus" 23 | "github.com/dgraph-io/badger/y" 24 | ) 25 | 26 | type db struct { 27 | opts Options 28 | writeChan chan *request 29 | l0handler *levelHandler 30 | l1handler *levelHandler 31 | absPath string 32 | manifest *manifest 33 | mtable *hashMap 34 | immtable *hashMap 35 | flushDisk chan *hashMap 36 | writeCloser *y.Closer 37 | loadBalancingCloser *y.Closer 38 | compactionCloser *y.Closer 39 | flushDiskCloser *y.Closer 40 | sync.RWMutex 41 | } 42 | 43 | type request struct { 44 | key []byte 45 | value []byte 46 | wg sync.WaitGroup 47 | } 48 | 49 | func New(opts Options) (*db, error) { 50 | absPath, err := filepath.Abs(opts.path) 51 | if err != nil { 52 | return nil, err 53 | } 54 | manifest, err := loadOrCreateManifest(absPath) 55 | if err != nil { 56 | return nil, err 57 | } 58 | 59 | l0handler := newLevelHanlder() 60 | for _, l0file := range manifest.L0Files { 61 | t := newTable(absPath, l0file.Idx) 62 | l0handler.addTable(t, l0file.Idx) 63 | } 64 | l1handler := newLevelHanlder() 65 | for _, l1file := range manifest.L1Files { 66 | t := newTable(absPath, l1file.Idx) 67 | l1handler.addTable(t, l1file.Idx) 68 | } 69 | db := &db{ 70 | opts: opts, 71 | writeChan: make(chan *request, 1000), 72 | absPath: absPath, 73 | manifest: manifest, 74 | mtable: newHashMap(opts.memtablesize), 75 | l0handler: l0handler, 76 | l1handler: l1handler, 77 | writeCloser: y.NewCloser(1), 78 | loadBalancingCloser: y.NewCloser(1), 79 | compactionCloser: y.NewCloser(1), 80 | flushDiskCloser: y.NewCloser(1), 81 | flushDisk: make(chan *hashMap, 1), 82 | } 83 | go db.runCompaction(db.compactionCloser) 84 | go db.listenForFlushing(db.flushDiskCloser) 85 | go db.loadBalancing(db.loadBalancingCloser) 86 | go db.acceptWrite(db.writeCloser) 87 | return db, nil 88 | } 89 | 90 | func (d *db) Close() { 91 | 92 | d.loadBalancingCloser.SignalAndWait() 93 | d.compactionCloser.SignalAndWait() 94 | d.writeCloser.SignalAndWait() 95 | if d.mtable.Len() > 0 { 96 | d.flushDisk <- d.mtable 97 | } 98 | d.flushDiskCloser.SignalAndWait() 99 | err := d.manifest.save(d.absPath) 100 | if err != nil { 101 | logrus.Fatalf("manifest: unable to save the manifest %s", err.Error()) 102 | } 103 | } 104 | 105 | func (d *db) Set(key, val []byte) { 106 | r := request{ 107 | key: key, 108 | value: val, 109 | } 110 | r.wg.Add(1) 111 | d.writeChan <- &r 112 | r.wg.Wait() 113 | } 114 | func (d *db) acceptWrite(closer *y.Closer) { 115 | 116 | loop: 117 | for { 118 | select { 119 | case req := <-d.writeChan: 120 | 121 | // do write 122 | d.write(req) 123 | 124 | case <-closer.HasBeenClosed(): 125 | break loop 126 | } 127 | } 128 | close(d.writeChan) 129 | for req := range d.writeChan { 130 | d.write(req) 131 | } 132 | closer.Done() 133 | } 134 | 135 | func (d *db) write(req *request) { 136 | 137 | if !d.mtable.isEnoughSpace(len(req.key) + len(req.value)) { 138 | d.Lock() 139 | d.immtable = d.mtable 140 | d.mtable = newHashMap(d.opts.memtablesize) 141 | d.Unlock() 142 | d.flushDisk <- d.immtable 143 | } 144 | d.mtable.Set(req.key, req.value) 145 | req.wg.Done() 146 | 147 | } 148 | 149 | func (d *db) listenForFlushing(closer *y.Closer) { 150 | // original paper don't have this immutable table. btw I'm borrowing 151 | // it from wisckey's and badger implementation for async flushing to disk 152 | // instead of stalling at write. 153 | loop: 154 | for { 155 | select { 156 | case <-closer.HasBeenClosed(): 157 | break loop 158 | case imtable := <-d.flushDisk: 159 | d.flushMem(imtable) 160 | } 161 | } 162 | close(d.flushDisk) 163 | for imtable := range d.flushDisk { 164 | d.flushMem(imtable) 165 | } 166 | closer.Done() 167 | } 168 | 169 | func (d *db) flushMem(imtable *hashMap) { 170 | nxtID := d.manifest.nextFileID() 171 | imtable.toDisk(d.absPath, nxtID) 172 | d.manifest.addl0file(imtable.records, imtable.minRange, imtable.maxRange, imtable.occupiedSpace(), nxtID) 173 | table := newTable(d.absPath, nxtID) 174 | d.l0handler.addTable(table, nxtID) 175 | d.Lock() 176 | d.immtable = nil 177 | d.Unlock() 178 | } 179 | 180 | func (d *db) mergeTable(t1, t2 *table) { 181 | t1.SeekBegin() 182 | t2.SeekBegin() 183 | builder := newTableMergeBuilder(int(t1.size + t2.size)) 184 | builder.append(t1.fp, int64(t1.fileInfo.metaOffset)) 185 | builder.append(t2.fp, int64(t2.fileInfo.metaOffset)) 186 | builder.mergeHashMap(t1.offsetMap, 0) 187 | builder.mergeHashMap(t2.offsetMap, uint32(t1.fileInfo.metaOffset)) 188 | buf := builder.finish() 189 | d.saveL1Table(buf) 190 | } 191 | 192 | func (d *db) saveL1Table(buf []byte) { 193 | FID := d.manifest.nextFileID() 194 | fp, err := os.Create(giveTablePath(d.absPath, FID)) 195 | if err != nil { 196 | logrus.Fatalf("compaction: unable to create new while pushing to level 1 %s", err.Error()) 197 | } 198 | n, err := fp.Write(buf) 199 | if err != nil { 200 | logrus.Fatalf("compaction: unable to write to new level 1 table %s", err.Error()) 201 | } 202 | if n != len(buf) { 203 | logrus.Fatalf("compaction: unable to write a new file at level 1 table expected %d but got %d", len(buf), n) 204 | } 205 | //l1 table has been created so have to remove those files from l0 206 | // and add it to l1 207 | newt := newTable(d.absPath, FID) 208 | d.l1handler.addTable(newt, FID) 209 | 210 | d.manifest.addl1file(uint32(newt.fileInfo.entries), newt.fileInfo.minRange, newt.fileInfo.maxRange, int(newt.size), FID) 211 | logrus.Infof("comapction: new l1 file has beed added %d", FID) 212 | } 213 | 214 | func (d *db) L0Compaction() { 215 | // sorting according to the denisty 216 | d.manifest.sortL0() 217 | // create two victim table 218 | d.manifest.mutex.Lock() 219 | t1, t2 := newTable(d.absPath, d.manifest.L0Files[0].Idx), newTable(d.absPath, d.manifest.L0Files[1].Idx) 220 | d.manifest.mutex.Unlock() 221 | d.mergeTable(t1, t2) 222 | d.l0handler.deleteTable(t1.ID()) 223 | t1.close() 224 | removeTable(d.absPath, t1.ID()) 225 | d.manifest.deleteL0Table(t1.ID()) 226 | logrus.Infof("comapction: l0 file has beed deleted %d", t1.ID()) 227 | d.l0handler.deleteTable(t2.ID()) 228 | t2.close() 229 | removeTable(d.absPath, t2.ID()) 230 | d.manifest.deleteL0Table(t2.ID()) 231 | logrus.Infof("comapction: l0 file has beed deleted %d", t2.ID()) 232 | } 233 | 234 | func (d *db) runCompaction(closer *y.Closer) { 235 | // ticker := time.NewTicker(time.Second) 236 | // defer ticker.Stop() 237 | 238 | loop: 239 | for { 240 | select { 241 | case <-closer.HasBeenClosed(): 242 | break loop 243 | default: 244 | // check for l0Tables 245 | len := d.manifest.l0Len() 246 | if len >= d.opts.NoOfL0Files { 247 | if d.manifest.l1Len() == 0 { 248 | d.L0Compaction() 249 | } 250 | // level one files already exist so find union set to push 251 | // if overlapping range then append accordingly other wise just push down 252 | l0fs := d.manifest.copyL0() 253 | fmt.Printf("%+v \n", d.manifest) 254 | for _, l0f := range l0fs { 255 | p := d.manifest.findL1Policy(l0f) 256 | if p.policy == NOTUNION { 257 | d.handleNotUnion(p, l0f) 258 | continue 259 | } 260 | if p.policy == UNION { 261 | d.handleUnion(p, l0f) 262 | continue 263 | } 264 | 265 | if p.policy == OVERLAPPING { 266 | d.handleOverlapping(p, l0f) 267 | } 268 | } 269 | } 270 | } 271 | } 272 | closer.Done() 273 | } 274 | 275 | func (d *db) loadBalancing(closer *y.Closer) { 276 | // ticker := time.NewTicker(time.Second) 277 | // defer ticker.Stop() 278 | loop: 279 | for { 280 | select { 281 | case <-closer.HasBeenClosed(): 282 | break loop 283 | 284 | default: 285 | for _, l1f := range d.manifest.copyL1() { 286 | if l1f.Size > uint32(d.opts.maxL1Size) { 287 | logrus.Infof("load balancing: l1 file %d found which it larger than max l1 size", l1f.Idx) 288 | l1t := newTable(d.absPath, l1f.Idx) 289 | ents := l1t.entries() 290 | k := len(ents) / 2 291 | median := ents[k] 292 | builders := []*mergeTableBuilder{newTableMergeBuilder(int(l1f.Size) / 2), newTableMergeBuilder(int(l1f.Size) / 2)} 293 | iter := l1t.iter() 294 | for iter.has() { 295 | kl, vl, key, val := iter.next() 296 | c := crc32.New(CastagnoliCrcTable) 297 | c.Write(key) 298 | hash := c.Sum32() 299 | if hash < median { 300 | builders[0].add(kl, vl, key, val, hash) 301 | continue 302 | } 303 | builders[1].add(kl, vl, key, val, hash) 304 | continue 305 | } 306 | d.saveL1Table(builders[0].finish()) 307 | d.saveL1Table(builders[1].finish()) 308 | d.l1handler.deleteTable(l1f.Idx) 309 | d.manifest.deleteL1Table(l1f.Idx) 310 | logrus.Infof("load balancing: l1 file %d is splitted into two l1 files properly",l1f.Idx) 311 | } 312 | } 313 | } 314 | } 315 | closer.Done() 316 | } 317 | 318 | func (d *db) Get(key []byte) ([]byte, bool) { 319 | val, exist := d.mtable.Get(key) 320 | if exist { 321 | return val, exist 322 | } 323 | if d.immtable != nil { 324 | val, exist := d.immtable.Get(key) 325 | if exist { 326 | return val, exist 327 | } 328 | } 329 | 330 | val, exist = d.l0handler.get(key) 331 | if exist { 332 | return val, exist 333 | } 334 | return d.l1handler.get(key) 335 | } 336 | -------------------------------------------------------------------------------- /db_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "bytes" 17 | "sync" 18 | "testing" 19 | 20 | "github.com/dgraph-io/badger/y" 21 | ) 22 | 23 | func TestDB(t *testing.T) { 24 | opts := DefaultOptions() 25 | //opts.path = "/tmp" 26 | d, err := New(opts) 27 | if err != nil { 28 | t.Fatalf("db is expected to open but got error %s", err.Error()) 29 | } 30 | d.Set([]byte("hello"), []byte("schoolboy")) 31 | d.Close() 32 | d, err = New(opts) 33 | if err != nil { 34 | t.Fatalf("db is expected to open but got error %s", err.Error()) 35 | } 36 | val, exist := d.Get([]byte("hello")) 37 | if !exist { 38 | t.Fatalf("unable to retrive data") 39 | } 40 | if bytes.Compare(val, []byte("schoolboy")) != 0 { 41 | t.Fatalf("value is not same expected schoolboy but got %s", string(val)) 42 | } 43 | d.Close() 44 | } 45 | 46 | func TestCloser(t *testing.T) { 47 | closer := y.NewCloser(1) 48 | go func() { 49 | loop: 50 | for { 51 | select { 52 | case <-closer.HasBeenClosed(): 53 | 54 | break loop 55 | } 56 | } 57 | closer.Done() 58 | }() 59 | closer.SignalAndWait() 60 | } 61 | 62 | func TestConcurrent(t *testing.T) { 63 | opts := DefaultOptions() 64 | d, err := New(opts) 65 | if err != nil { 66 | t.Fatalf("db is expected to open but got error %s", err.Error()) 67 | } 68 | var wg sync.WaitGroup 69 | wg.Add(1) 70 | wg.Add(1) 71 | go func() { 72 | for i := 0; i < 100; i++ { 73 | key := []byte("vanakam" + string(i)) 74 | value := []byte("nanbare" + string(i)) 75 | d.Set(key, value) 76 | } 77 | wg.Done() 78 | }() 79 | go func() { 80 | for i := 101; i < 200; i++ { 81 | key := []byte("vanakam" + string(i)) 82 | value := []byte("nanbare" + string(i)) 83 | d.Set(key, value) 84 | } 85 | wg.Done() 86 | }() 87 | wg.Wait() 88 | d.Close() 89 | wg.Add(1) 90 | d, err = New(opts) 91 | if err != nil { 92 | t.Fatalf("db is expected to open but got error %s", err.Error()) 93 | } 94 | go func() { 95 | for i := 108; i < 234; i++ { 96 | key := []byte("vanakam" + string(i)) 97 | value := []byte("nanbare" + string(i)) 98 | d.Set(key, value) 99 | } 100 | wg.Done() 101 | }() 102 | wg.Wait() 103 | d.Close() 104 | wg = sync.WaitGroup{} 105 | d, err = New(opts) 106 | wg.Add(1) 107 | wg.Add(1) 108 | wg.Add(1) 109 | go func() { 110 | for i := 0; i < 100; i++ { 111 | 112 | key := []byte("vanakam" + string(i)) 113 | value := []byte("nanbare" + string(i)) 114 | inv, exist := d.Get(key) 115 | if !exist { 116 | break 117 | //t.Fatalf("value not found for %s", string(key)) 118 | 119 | } 120 | if bytes.Compare(value, inv) != 0 { 121 | break 122 | //t.Fatalf("expected value %s but got %s", string(value), string(inv)) 123 | 124 | } 125 | } 126 | wg.Done() 127 | }() 128 | go func() { 129 | for i := 101; i < 200; i++ { 130 | key := []byte("vanakam" + string(i)) 131 | value := []byte("nanbare" + string(i)) 132 | inv, exist := d.Get(key) 133 | if !exist { 134 | break 135 | //t.Fatalf("value not found for %s", string(key)) 136 | } 137 | if bytes.Compare(value, inv) != 0 { 138 | break 139 | //t.Fatalf("expected value %s but got %s", string(value), string(inv)) 140 | } 141 | } 142 | wg.Done() 143 | }() 144 | go func() { 145 | for i := 101; i < 200; i++ { 146 | key := []byte("vanakam" + string(i)) 147 | value := []byte("nanbare" + string(i)) 148 | inv, exist := d.Get(key) 149 | if !exist { 150 | break 151 | //t.Fatalf("value not found for %s", string(key)) 152 | } 153 | if bytes.Compare(value, inv) != 0 { 154 | break 155 | //t.Fatalf("expected value %s but got %s", string(value), string(inv)) 156 | } 157 | } 158 | wg.Done() 159 | }() 160 | wg.Wait() 161 | d.Close() 162 | } 163 | 164 | func TestCompaction(t *testing.T) { 165 | opts := DefaultOptions() 166 | d, err := New(opts) 167 | if err != nil { 168 | t.Fatalf("db is expected to open but got error %s", err.Error()) 169 | } 170 | for i := 0; i < 100; i++ { 171 | key := []byte("vanakam" + string(i)) 172 | value := []byte("nanbare" + string(i)) 173 | d.Set(key, value) 174 | } 175 | d.Close() 176 | d, err = New(opts) 177 | if err != nil { 178 | t.Fatalf("db is expected to open but got error %s", err.Error()) 179 | } 180 | for i := 0; i < 100; i++ { 181 | key := []byte("vanakam" + string(i)) 182 | value := []byte("nanbare" + string(i)) 183 | d.Set(key, value) 184 | } 185 | d.Close() 186 | d, err = New(opts) 187 | if err != nil { 188 | t.Fatalf("db is expected to open but got error %s", err.Error()) 189 | } 190 | for i := 0; i < 100; i++ { 191 | key := []byte("vanakam" + string(i)) 192 | value := []byte("nanbare" + string(i)) 193 | d.Set(key, value) 194 | } 195 | d.Close() 196 | d, err = New(opts) 197 | if err != nil { 198 | t.Fatalf("db is expected to open but got error %s", err.Error()) 199 | } 200 | for i := 50; i < 200; i++ { 201 | key := []byte("vanakam" + string(i)) 202 | value := []byte("nanbare" + string(i)) 203 | d.Set(key, value) 204 | } 205 | d.Close() 206 | d, err = New(opts) 207 | if err != nil { 208 | t.Fatalf("db is expected to open but got error %s", err.Error()) 209 | } 210 | for i := 0; i < 100; i++ { 211 | key := []byte("vanakam" + string(i)) 212 | value := []byte("nanbare" + string(i)) 213 | d.Set(key, value) 214 | } 215 | d.Close() 216 | } 217 | -------------------------------------------------------------------------------- /docs/design.md: -------------------------------------------------------------------------------- 1 | # levelHandler 2 | level handler is responsile fot holding all the tables in the level. we interact with level handler for all the table releated operation. 3 | 4 | but retriving policy is somewhat difference because in level 0 we have overlapping key across table so we'll use tree for indexing the possible table. In level 1 we just pick one table from tree and look for the value. level handler is gaurder by mutex for concurrent get and insertion and removal of table on compaction 5 | 6 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/balajijinnah/aran 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/AndreasBriese/bbloom v0.0.0-20190825152654-46b345b51c96 7 | github.com/dgraph-io/badger v1.6.1 8 | github.com/sirupsen/logrus v1.6.0 9 | ) 10 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8= 2 | github.com/AndreasBriese/bbloom v0.0.0-20190825152654-46b345b51c96 h1:cTp8I5+VIoKjsnZuH8vjyaysT/ses3EvZeaV/1UkF2M= 3 | github.com/AndreasBriese/bbloom v0.0.0-20190825152654-46b345b51c96/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8= 4 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 5 | github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= 6 | github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= 7 | github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= 8 | github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= 9 | github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= 10 | github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= 11 | github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= 12 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 13 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 14 | github.com/dgraph-io/badger v1.6.1 h1:w9pSFNSdq/JPM1N12Fz/F/bzo993Is1W+Q7HjPzi7yg= 15 | github.com/dgraph-io/badger v1.6.1/go.mod h1:FRmFw3uxvcpa8zG3Rxs0th+hCLIuaQg8HlNV5bjgnuU= 16 | github.com/dgraph-io/ristretto v0.0.2/go.mod h1:KPxhHT9ZxKefz+PCeOGsrHpl1qZ7i70dGTu2u+Ahh6E= 17 | github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= 18 | github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= 19 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 20 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 21 | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= 22 | github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= 23 | github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= 24 | github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= 25 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 26 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 27 | github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= 28 | github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= 29 | github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= 30 | github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= 31 | github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= 32 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 33 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 34 | github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= 35 | github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= 36 | github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= 37 | github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 38 | github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 39 | github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= 40 | github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= 41 | github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= 42 | github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= 43 | github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= 44 | github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= 45 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 46 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 47 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 48 | github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= 49 | github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= 50 | golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 51 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 52 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI= 53 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 54 | golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 55 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 56 | golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 57 | golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb h1:fgwFCsaw9buMuxNd6+DQfAuSFqbNiQZpcgJQAgJsK6k= 58 | golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 59 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 60 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 61 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 62 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 63 | -------------------------------------------------------------------------------- /helper.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "fmt" 17 | "os" 18 | 19 | "github.com/sirupsen/logrus" 20 | ) 21 | 22 | func giveTablePath(abs string, idx uint32) string { 23 | return fmt.Sprintf("%s/%d.table", abs, idx) 24 | } 25 | 26 | func minRange(a, b uint32) uint32 { 27 | if a > b { 28 | return b 29 | } else { 30 | return a 31 | } 32 | } 33 | func maxRange(a, b uint32) uint32 { 34 | if a > b { 35 | return a 36 | } else { 37 | return b 38 | } 39 | } 40 | 41 | func removeTable(abs string, idx uint32) { 42 | tp := giveTablePath(abs, idx) 43 | err := os.Remove(tp) 44 | if err != nil { 45 | logrus.Errorf("unable to delete the %d table", idx) 46 | } 47 | logrus.Infof("compaction: remove %d table", idx) 48 | } 49 | 50 | // https://codereview.stackexchange.com/questions/60074/in-array-in-go 51 | func in_array(val uint32, array []uint32) (index int, exists bool) { 52 | exists = false 53 | index = -1 54 | 55 | for i, v := range array { 56 | if val == v { 57 | index = i 58 | exists = true 59 | return 60 | } 61 | } 62 | 63 | return 64 | } 65 | -------------------------------------------------------------------------------- /inmemory.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "bytes" 17 | "encoding/binary" 18 | "encoding/gob" 19 | "fmt" 20 | "os" 21 | "path/filepath" 22 | "sync" 23 | "sync/atomic" 24 | 25 | "hash/crc32" 26 | 27 | "github.com/AndreasBriese/bbloom" 28 | ) 29 | 30 | var CastagnoliCrcTable = crc32.MakeTable(crc32.Castagnoli) 31 | 32 | type hashMap struct { 33 | buf []byte 34 | currentOffset int 35 | minRange uint32 36 | maxRange uint32 37 | concurrentMap map[uint32]uint32 38 | size int 39 | records uint32 40 | sync.RWMutex 41 | } 42 | 43 | func newHashMap(size int) *hashMap { 44 | 45 | return &hashMap{ 46 | buf: make([]byte, size), 47 | currentOffset: 0, 48 | minRange: 0, 49 | maxRange: 0, 50 | concurrentMap: make(map[uint32]uint32, 0), 51 | size: size, 52 | RWMutex: sync.RWMutex{}, 53 | } 54 | } 55 | 56 | func (h *hashMap) Set(key, value []byte) { 57 | h.Lock() 58 | c := crc32.New(CastagnoliCrcTable) 59 | c.Write(key) 60 | hash := c.Sum32() 61 | oldOffSet := h.currentOffset 62 | kl := len(key) 63 | vl := len(value) 64 | // each 4 byte is for storing key and value length 65 | binary.BigEndian.PutUint32(h.buf[h.currentOffset:], uint32(kl)) 66 | h.currentOffset += 4 67 | binary.BigEndian.PutUint32(h.buf[h.currentOffset:], uint32(vl)) 68 | h.currentOffset += 4 69 | copy(h.buf[h.currentOffset:h.currentOffset+kl], key) 70 | h.currentOffset += kl 71 | copy(h.buf[h.currentOffset:h.currentOffset+vl], value) 72 | h.currentOffset += vl 73 | h.concurrentMap[hash] = uint32(oldOffSet) 74 | h.Unlock() 75 | h.setRange(hash) 76 | atomic.AddUint32(&h.records, 1) 77 | } 78 | 79 | func (h *hashMap) Get(outkey []byte) ([]byte, bool) { 80 | h.RLock() 81 | defer h.RUnlock() 82 | c := crc32.New(CastagnoliCrcTable) 83 | c.Write(outkey) 84 | hash := c.Sum32() 85 | offset, ok := h.concurrentMap[hash] 86 | if !ok { 87 | return nil, ok 88 | } 89 | castedOffset := offset 90 | kl := binary.BigEndian.Uint32(h.buf[castedOffset : castedOffset+4]) 91 | castedOffset += 4 92 | vl := binary.BigEndian.Uint32(h.buf[castedOffset : castedOffset+4]) 93 | castedOffset += 4 94 | key := h.buf[castedOffset : castedOffset+kl] 95 | if bytes.Compare(key, outkey) != 0 { 96 | 97 | return nil, false 98 | } 99 | castedOffset += kl 100 | return h.buf[castedOffset : castedOffset+vl], true 101 | } 102 | 103 | func (h *hashMap) setRange(r uint32) { 104 | h.Lock() 105 | defer h.Unlock() 106 | h.setMinRage(r) 107 | h.setMaxRange(r) 108 | } 109 | func (h *hashMap) setMinRage(r uint32) { 110 | if h.minRange == 0 { 111 | h.minRange = r 112 | return 113 | } 114 | if h.minRange >= r { 115 | h.minRange = r 116 | } 117 | } 118 | 119 | func (h *hashMap) setMaxRange(r uint32) { 120 | if h.maxRange == 0 { 121 | h.maxRange = r 122 | return 123 | } 124 | if h.maxRange <= r { 125 | h.maxRange = r 126 | } 127 | } 128 | 129 | func (h *hashMap) isEnoughSpace(size int) bool { 130 | h.RLock() 131 | defer h.RUnlock() 132 | left := h.size - h.currentOffset 133 | if left < size { 134 | return false 135 | } 136 | return true 137 | } 138 | 139 | func (h *hashMap) occupiedSpace() int { 140 | return h.size - h.currentOffset 141 | } 142 | 143 | type fileInfo struct { 144 | metaOffset int 145 | entries int 146 | minRange uint32 147 | maxRange uint32 148 | filterSize int 149 | } 150 | 151 | //TODO: avoid unnecessary converstion 152 | func (fi *fileInfo) Decode(buf []byte) { 153 | _ = buf[31] 154 | fi.metaOffset = int(binary.BigEndian.Uint32(buf[0:4])) 155 | fi.entries = int(binary.BigEndian.Uint32(buf[4:8])) 156 | fi.minRange = binary.BigEndian.Uint32(buf[8:16]) 157 | fi.maxRange = binary.BigEndian.Uint32(buf[16:24]) 158 | fi.filterSize = int(binary.BigEndian.Uint32(buf[24:32])) 159 | } 160 | 161 | func (fi *fileInfo) Encode(buf []byte) { 162 | _ = buf[31] 163 | binary.BigEndian.PutUint32(buf[0:4], uint32(fi.metaOffset)) 164 | binary.BigEndian.PutUint32(buf[4:8], uint32(fi.entries)) 165 | binary.BigEndian.PutUint32(buf[8:16], fi.minRange) 166 | binary.BigEndian.PutUint32(buf[16:24], fi.maxRange) 167 | binary.BigEndian.PutUint32(buf[24:32], uint32(fi.filterSize)) 168 | 169 | } 170 | 171 | func (h *hashMap) toDisk(p string, idx uint32) { 172 | h.Lock() 173 | defer h.Unlock() 174 | filePath, err := filepath.Abs(p) 175 | if err != nil { 176 | panic("unable to form path for flushing the disk") 177 | } 178 | fp, err := os.Create(fmt.Sprintf("%s/%d.table", filePath, idx)) 179 | if err != nil { 180 | panic(fmt.Sprintf("unable to flush the in-memory table %v", err)) 181 | } 182 | fp.Write(h.buf[0:h.currentOffset]) 183 | slots := h.Len() 184 | filter := bbloom.New(float64(slots), 0.01) 185 | 186 | for key, _ := range h.concurrentMap { 187 | // kl := binary.BigEndian.Uint32(h.buf[valueOffset : valueOffset+4]) 188 | // valueOffset += 4 189 | // valueOffset += 4 190 | buf := make([]byte, 4) 191 | binary.BigEndian.PutUint32(buf, key) 192 | filter.Add(buf) //h.buf[valueOffset : valueOffset+kl]) 193 | } 194 | fib := make([]byte, 32) 195 | filterJSON := filter.JSONMarshal() 196 | fi := fileInfo{ 197 | metaOffset: h.currentOffset, 198 | entries: slots, 199 | minRange: h.minRange, 200 | maxRange: h.maxRange, 201 | filterSize: len(filterJSON), 202 | } 203 | fi.Encode(fib) 204 | metaBuf := new(bytes.Buffer) 205 | encoder := gob.NewEncoder(metaBuf) 206 | err = encoder.Encode(h.concurrentMap) 207 | if err != nil { 208 | panic("unable to create encoder") 209 | } 210 | fp.Write(metaBuf.Bytes()) 211 | fp.Write(filterJSON) 212 | fp.Write(fib) 213 | fp.Close() 214 | } 215 | 216 | func (h *hashMap) Len() int { 217 | return len(h.concurrentMap) 218 | } 219 | -------------------------------------------------------------------------------- /inmemory_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "bytes" 17 | "fmt" 18 | "os" 19 | "path/filepath" 20 | "testing" 21 | ) 22 | 23 | func TestGetSet(t *testing.T) { 24 | hashMap := newHashMap(1024) 25 | key := []byte("vanakam") 26 | value := []byte("nanbare") 27 | hashMap.Set(key, value) 28 | inv, exist := hashMap.Get(key) 29 | if !exist { 30 | t.Fatal("key not found in the hashmap") 31 | } 32 | if bytes.Compare(value, inv) != 0 { 33 | t.Fatalf("expected value %s but got value %s", string(value), string(inv)) 34 | } 35 | 36 | } 37 | 38 | func TestGetSet100(t *testing.T) { 39 | hashMap := newHashMap(64 << 20) 40 | for i := 0; i < 100; i++ { 41 | key := []byte("vanakam" + string(i)) 42 | value := []byte("nanbare" + string(i)) 43 | hashMap.Set(key, value) 44 | inv, exist := hashMap.Get(key) 45 | if !exist { 46 | t.Fatal("key not found in the hashmap") 47 | } 48 | if bytes.Compare(value, inv) != 0 { 49 | t.Fatalf("expected value %s but got value %s", string(value), string(inv)) 50 | } 51 | } 52 | } 53 | 54 | func TestSaveToFile(t *testing.T) { 55 | hashMap := newHashMap(64 << 20) 56 | for i := 0; i < 100; i++ { 57 | key := []byte("vanakam" + string(i)) 58 | value := []byte("nanbare" + string(i)) 59 | 60 | hashMap.Set(key, value) 61 | inv, exist := hashMap.Get(key) 62 | if !exist { 63 | t.Fatal("key not found in the hashmap") 64 | } 65 | if bytes.Compare(value, inv) != 0 { 66 | t.Fatalf("expected value %s but got value %s", string(value), string(inv)) 67 | } 68 | } 69 | hashMap.toDisk("./", 1) 70 | filePath, err := filepath.Abs("./") 71 | if err != nil { 72 | panic("unable to form path for flushing the disk") 73 | } 74 | 75 | if _, err := os.Stat(fmt.Sprintf("%s/%d.table", filePath, 1)); os.IsNotExist(err) { 76 | panic("file not exist") 77 | } 78 | os.Remove(fmt.Sprintf("%s/%d.table", filePath, 1)) 79 | } 80 | -------------------------------------------------------------------------------- /iterator.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "encoding/binary" 17 | "os" 18 | 19 | "github.com/sirupsen/logrus" 20 | ) 21 | 22 | // simple forward iterator 23 | type iterator struct { 24 | currentOffset int 25 | metaOffset int 26 | fp *os.File 27 | } 28 | 29 | func newIterator(fp *os.File, metaOffset int) *iterator { 30 | fp.Seek(0, 0) 31 | return &iterator{currentOffset: 0, metaOffset: metaOffset, fp: fp} 32 | } 33 | 34 | func (t *iterator) has() bool { 35 | has := t.currentOffset != t.metaOffset 36 | if has == false { 37 | t.fp.Close() 38 | } 39 | return has 40 | } 41 | 42 | // kl, vl, key, val 43 | func (t *iterator) next() ([]byte, []byte, []byte, []byte) { 44 | buf := make([]byte, 8) 45 | n, err := t.fp.Read(buf) 46 | if err != nil { 47 | logrus.Fatalf("iterator: failed during reading key and value length %s", err.Error()) 48 | } 49 | if n != 8 { 50 | logrus.Fatalf("iterator: failed to read key and value length expected 8 but got %d", n) 51 | } 52 | kl := binary.BigEndian.Uint32(buf[0:4]) 53 | vl := binary.BigEndian.Uint32(buf[4:8]) 54 | bufval := make([]byte, kl+vl) 55 | n, err = t.fp.Read(bufval) 56 | if err != nil { 57 | logrus.Fatalf("iterator: failed during reading key and value %s", err.Error()) 58 | } 59 | if n != int(kl+vl) { 60 | logrus.Fatalf("iterator: failed to read key and value expected %d but got %d", kl+vl, n) 61 | } 62 | t.currentOffset += 8 + int(kl) + int(vl) 63 | return buf[0:4], buf[4:8], bufval[0:kl], bufval[kl : kl+vl] 64 | } 65 | -------------------------------------------------------------------------------- /iterator_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "testing" 17 | ) 18 | 19 | func TestIterator(t *testing.T) { 20 | tb := testTable("vanakam", "nanbare", 1, 100, 1) 21 | iter := tb.iter() 22 | records := 0 23 | for iter.has() { 24 | iter.next() 25 | records++ 26 | } 27 | removeTestTable(1) 28 | if records != 99 { 29 | t.Fatalf("expected 99 records but got %d", records) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /l1policy.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | type l1policyPush int 16 | 17 | const ( 18 | UNION l1policyPush = iota 19 | OVERLAPPING 20 | NOTUNION 21 | ) 22 | 23 | type compactionPolicy struct { 24 | policy l1policyPush 25 | tableIDS []uint32 26 | } 27 | 28 | func (m *manifest) findL1Policy(tm tableManifest) compactionPolicy { 29 | m.mutex.RLock() 30 | defer m.mutex.RUnlock() 31 | cp := compactionPolicy{ 32 | tableIDS: make([]uint32, 0), 33 | policy: NOTUNION, 34 | } 35 | for _, l1m := range m.L1Files { 36 | // we'll merge if both are union to vice versa 37 | if (l1m.MinRange <= tm.MinRange && l1m.MaxRange >= tm.MaxRange) || (l1m.MinRange >= tm.MinRange && l1m.MaxRange <= tm.MaxRange) { 38 | cp.policy = UNION 39 | cp.tableIDS = append(cp.tableIDS, l1m.Idx) 40 | return cp 41 | } 42 | if (l1m.MinRange <= tm.MinRange && l1m.MaxRange > tm.MinRange) || (l1m.MinRange < tm.MaxRange && l1m.MaxRange >= tm.MaxRange) { 43 | cp.policy = OVERLAPPING 44 | cp.tableIDS = append(cp.tableIDS, l1m.Idx) 45 | } 46 | } 47 | return cp 48 | } 49 | -------------------------------------------------------------------------------- /l1policy_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import "testing" 16 | 17 | func TestPolicy(t *testing.T) { 18 | m := &manifest{ 19 | L1Files: []tableManifest{ 20 | tableManifest{MaxRange: 100, MinRange: 100}, 21 | }, 22 | } 23 | p := m.findL1Policy(tableManifest{MaxRange: 100, MinRange: 100}) 24 | if p.policy != UNION { 25 | t.Fatalf("exptected UNION %d but got %d", UNION, p.policy) 26 | } 27 | p = m.findL1Policy(tableManifest{MaxRange: 400, MinRange: 300}) 28 | if p.policy != NOTUNION { 29 | t.Fatalf("exptected NOTUNION %d but got %d", NOTUNION, p.policy) 30 | } 31 | m.L1Files = append(m.L1Files, tableManifest{ 32 | MaxRange: 300, 33 | MinRange: 200, 34 | }) 35 | p = m.findL1Policy(tableManifest{MaxRange: 450, MinRange: 250}) 36 | if p.policy != OVERLAPPING { 37 | t.Fatalf("exptected OVERLAPPING %d but got %d", OVERLAPPING, p.policy) 38 | } 39 | p = m.findL1Policy(tableManifest{MaxRange: 250, MinRange: 150}) 40 | if p.policy != OVERLAPPING { 41 | t.Fatalf("exptected OVERLAPPING %d but got %d", OVERLAPPING, p.policy) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /level_handler.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "hash/crc32" 17 | "sync" 18 | ) 19 | 20 | type levelHandler struct { 21 | tables []*table 22 | indexer *tree 23 | sync.RWMutex 24 | } 25 | 26 | func newLevelHanlder() *levelHandler { 27 | return &levelHandler{ 28 | tables: make([]*table, 0), 29 | indexer: newTree(), 30 | } 31 | } 32 | 33 | func (l *levelHandler) addTable(t *table, idx uint32) { 34 | l.Lock() 35 | defer l.Unlock() 36 | l.tables = append(l.tables, t) 37 | l.indexer.insert(t.fileInfo.minRange, idx) 38 | } 39 | 40 | func (l *levelHandler) deleteTable(idx uint32) { 41 | l.Lock() 42 | defer l.Unlock() 43 | l.indexer.deleteTable(idx) 44 | for i, table := range l.tables { 45 | if table.ID() == idx { 46 | l.tables[i] = l.tables[len(l.tables)-1] 47 | l.tables[len(l.tables)-1] = nil 48 | l.tables = l.tables[:len(l.tables)-1] 49 | break 50 | } 51 | } 52 | } 53 | 54 | func (l *levelHandler) get(key []byte) ([]byte, bool) { 55 | l.RLock() 56 | defer l.RUnlock() 57 | c := crc32.New(CastagnoliCrcTable) 58 | c.Write(key) 59 | hash := c.Sum32() 60 | nodes := l.indexer.findAllLargestRange(hash) 61 | 62 | for _, node := range nodes { 63 | for _, id := range node.idx { 64 | t := l.getTable(id) 65 | if t != nil { 66 | val, exist := t.Get(key) 67 | if exist { 68 | return val, true 69 | } 70 | } 71 | } 72 | 73 | } 74 | return nil, false 75 | } 76 | 77 | func (l *levelHandler) getTable(idx uint32) *table { 78 | for _, t := range l.tables { 79 | if t.ID() == idx { 80 | return t 81 | } 82 | } 83 | return nil 84 | } 85 | -------------------------------------------------------------------------------- /manifest.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "encoding/gob" 17 | "os" 18 | "path" 19 | "sort" 20 | "sync" 21 | "sync/atomic" 22 | ) 23 | 24 | type manifest struct { 25 | L1Files []tableManifest 26 | L0Files []tableManifest 27 | NextIDX uint32 28 | mutex sync.RWMutex 29 | } 30 | 31 | func loadOrCreateManifest(abspath string) (*manifest, error) { 32 | manifestPath := path.Join(abspath, "manifest.data") 33 | if _, err := os.Stat(manifestPath); os.IsNotExist(err) { 34 | fp, err := os.Create(manifestPath) 35 | if err != nil { 36 | return nil, err 37 | } 38 | fp.Close() 39 | return &manifest{ 40 | L1Files: make([]tableManifest, 0), 41 | L0Files: make([]tableManifest, 0), 42 | NextIDX: 0, 43 | }, nil 44 | } 45 | fp, err := os.Open(manifestPath) 46 | if err != nil { 47 | return nil, err 48 | } 49 | m := &manifest{} 50 | decoder := gob.NewDecoder(fp) 51 | err = decoder.Decode(m) 52 | if err != nil { 53 | return nil, err 54 | } 55 | return m, nil 56 | } 57 | func (m *manifest) nextFileID() uint32 { 58 | atomic.AddUint32(&m.NextIDX, 1) 59 | return m.NextIDX 60 | } 61 | func (m *manifest) save(absPath string) error { 62 | manifestPath := path.Join(absPath, "manifest.data") 63 | fp, err := os.OpenFile(manifestPath, os.O_WRONLY, 0666) 64 | if err != nil { 65 | return err 66 | } 67 | encoder := gob.NewEncoder(fp) 68 | return encoder.Encode(m) 69 | } 70 | 71 | type tableManifest struct { 72 | MaxRange uint32 73 | MinRange uint32 74 | Idx uint32 75 | Size uint32 76 | Records uint32 77 | Density float32 78 | } 79 | 80 | type tableDesencing []tableManifest 81 | 82 | func (t tableDesencing) Len() int { 83 | return len(t) 84 | } 85 | 86 | func (t tableDesencing) Swap(i, j int) { 87 | t[i], t[j] = t[j], t[i] 88 | } 89 | func (t tableDesencing) Less(i, j int) bool { 90 | return t[i].Density > t[i].Density 91 | } 92 | func (m *manifest) addl0file(records, minRange, maxRange uint32, size int, idx uint32) { 93 | m.mutex.Lock() 94 | defer m.mutex.Unlock() 95 | m.L0Files = append(m.L0Files, tableManifest{ 96 | Records: records, 97 | MinRange: minRange, 98 | MaxRange: maxRange, 99 | Size: uint32(size), 100 | Density: float32(records) / float32(maxRange-minRange), 101 | Idx: idx, 102 | }) 103 | } 104 | func (m *manifest) addl1file(records, minRange, maxRange uint32, size int, idx uint32) { 105 | m.mutex.Lock() 106 | defer m.mutex.Unlock() 107 | m.L1Files = append(m.L1Files, tableManifest{ 108 | Records: records, 109 | MinRange: minRange, 110 | MaxRange: maxRange, 111 | Size: uint32(size), 112 | Density: float32(records) / float32(maxRange-minRange), 113 | Idx: idx, 114 | }) 115 | } 116 | 117 | func (m *manifest) l0Len() int { 118 | m.mutex.RLock() 119 | defer m.mutex.RUnlock() 120 | return len(m.L0Files) 121 | } 122 | func (m *manifest) l1Len() int { 123 | m.mutex.RLock() 124 | defer m.mutex.RUnlock() 125 | return len(m.L1Files) 126 | } 127 | 128 | func (m *manifest) sortL0() { 129 | m.mutex.Lock() 130 | defer m.mutex.Unlock() 131 | sort.Sort(tableDesencing(m.L0Files)) 132 | } 133 | 134 | func (m *manifest) deleteL0Table(idx uint32) { 135 | m.mutex.Lock() 136 | defer m.mutex.Unlock() 137 | for i := 0; i < len(m.L0Files); i++ { 138 | if m.L0Files[i].Idx == idx { 139 | m.L0Files[i] = m.L0Files[len(m.L0Files)-1] 140 | m.L0Files = m.L0Files[:len(m.L0Files)-1] 141 | break 142 | } 143 | } 144 | } 145 | 146 | func (m *manifest) deleteL1Table(idx uint32) { 147 | m.mutex.Lock() 148 | defer m.mutex.Unlock() 149 | for i := 0; i < len(m.L1Files); i++ { 150 | if m.L1Files[i].Idx == idx { 151 | m.L1Files[i] = m.L1Files[len(m.L1Files)-1] 152 | m.L1Files = m.L1Files[:len(m.L1Files)-1] 153 | break 154 | } 155 | } 156 | } 157 | 158 | func (m *manifest) copyL0() []tableManifest { 159 | m.mutex.Lock() 160 | defer m.mutex.Unlock() 161 | return m.L0Files 162 | } 163 | 164 | func (m *manifest) copyL1() []tableManifest { 165 | m.mutex.Lock() 166 | defer m.mutex.Unlock() 167 | return m.L1Files 168 | } 169 | -------------------------------------------------------------------------------- /merge_builder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "bufio" 17 | "bytes" 18 | "encoding/binary" 19 | "encoding/gob" 20 | "io" 21 | "os" 22 | 23 | "github.com/AndreasBriese/bbloom" 24 | "github.com/sirupsen/logrus" 25 | ) 26 | 27 | // It is used for merging table into buffer. usually meant to merge two table 28 | type mergeTableBuilder struct { 29 | buf *bytes.Buffer 30 | offsetMap map[uint32]uint32 31 | min uint32 32 | max uint32 33 | } 34 | 35 | func newTableMergeBuilder(size int) *mergeTableBuilder { 36 | buf := new(bytes.Buffer) 37 | buf.Grow(size) 38 | return &mergeTableBuilder{buf: buf, offsetMap: make(map[uint32]uint32), min: 0, max: 0} 39 | } 40 | 41 | func (m *mergeTableBuilder) Min() uint32 { 42 | return m.min 43 | } 44 | 45 | func (m *mergeTableBuilder) Max() uint32 { 46 | return m.max 47 | } 48 | 49 | // append data to the buffer 50 | func (m *mergeTableBuilder) append(fp *os.File, limit int64) { 51 | writer := bufio.NewWriter(m.buf) 52 | n, err := io.CopyN(writer, fp, limit) 53 | if err != nil { 54 | logrus.Fatalf("merge builder: unable to append data while mering %s", err.Error()) 55 | } 56 | if limit != n { 57 | logrus.Fatalf("merge builder: unable to append completely. expected %d but got %d", limit, n) 58 | } 59 | } 60 | 61 | func (m *mergeTableBuilder) add(kl, vl, key, val []byte, hash uint32) { 62 | offset := m.buf.Len() 63 | m.offsetMap[hash] = uint32(offset) 64 | m.setMax(hash) 65 | m.setMin(hash) 66 | n, err := m.buf.Write(kl) 67 | if err != nil { 68 | logrus.Fatalf("merge builder: unable to insert kl %s", err.Error()) 69 | } 70 | if len(kl) != n { 71 | logrus.Fatalf("merge builder: kl is not written completly expected %d but got %d", len(kl), n) 72 | } 73 | n, err = m.buf.Write(vl) 74 | if err != nil { 75 | logrus.Fatalf("merge builder: unable to insert vl %s", err.Error()) 76 | } 77 | if len(vl) != n { 78 | logrus.Fatalf("merge builder: vl is not written completly expected %d but got %d", len(kl), n) 79 | } 80 | n, err = m.buf.Write(key) 81 | if err != nil { 82 | logrus.Fatalf("merge builder: unable to insert key %s", err.Error()) 83 | } 84 | if len(key) != n { 85 | logrus.Fatalf("merge builder: key is not written completly expected %d but got %d", len(kl), n) 86 | } 87 | n, err = m.buf.Write(val) 88 | if err != nil { 89 | logrus.Fatalf("merge builder: unable to insert val %s", err.Error()) 90 | } 91 | if len(val) != n { 92 | logrus.Fatalf("merge builder: val is not written completly expected %d but got %d", len(kl), n) 93 | } 94 | } 95 | 96 | // merge hashmap and make filter for all the key, then write it to disk 97 | func (m *mergeTableBuilder) mergeHashMap(left map[uint32]uint32, offsetAdder uint32) { 98 | for key, value := range left { 99 | m.offsetMap[key] = value + offsetAdder 100 | m.setMin(key) 101 | m.setMax(key) 102 | } 103 | } 104 | 105 | func (m *mergeTableBuilder) setMin(min uint32) { 106 | if m.min == 0 { 107 | m.min = min 108 | return 109 | } 110 | if m.min > min { 111 | m.min = min 112 | } 113 | } 114 | 115 | func (m *mergeTableBuilder) setMax(max uint32) { 116 | if m.max == 0 { 117 | m.max = max 118 | return 119 | } 120 | if m.max < max { 121 | m.max = max 122 | } 123 | } 124 | 125 | func (m *mergeTableBuilder) appendFileInfo(fi *fileInfo) { 126 | fib := make([]byte, 32) 127 | fi.Encode(fib) 128 | n, err := m.buf.Write(fib) 129 | if err != nil { 130 | logrus.Fatalf("merge builder: unable to append file info %s", err.Error()) 131 | } 132 | if n != 32 { 133 | logrus.Fatalf("merge builder: unable to append file info completly expected %d got %d", 32, n) 134 | } 135 | } 136 | 137 | func (m *mergeTableBuilder) finish() []byte { 138 | el := len(m.offsetMap) 139 | filter := bbloom.New(float64(el), 0.01) 140 | buf := make([]byte, 4) 141 | for key := range m.offsetMap { 142 | binary.BigEndian.PutUint32(buf, key) 143 | filter.Add(buf) 144 | } 145 | mo := m.buf.Len() 146 | fJSON := filter.JSONMarshal() 147 | fl := len(fJSON) 148 | fi := &fileInfo{ 149 | metaOffset: mo, 150 | minRange: m.min, 151 | maxRange: m.max, 152 | entries: el, 153 | filterSize: fl, 154 | } 155 | e := gob.NewEncoder(m.buf) 156 | err := e.Encode(m.offsetMap) 157 | 158 | if err != nil { 159 | logrus.Fatalf("merge builder: unable to encode merged hashmap %s", err.Error()) 160 | } 161 | n, err := m.buf.Write(fJSON) 162 | if err != nil { 163 | logrus.Fatalf("merge builder: unable to write filter to the buffer %s", err.Error()) 164 | } 165 | if n != fl { 166 | logrus.Fatalf("merge builder: unable to write filter completley to the buffer expected %d got %d", fl, n) 167 | } 168 | m.appendFileInfo(fi) 169 | return m.buf.Bytes() 170 | } 171 | -------------------------------------------------------------------------------- /merge_builder_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "bytes" 17 | "fmt" 18 | "os" 19 | "testing" 20 | ) 21 | 22 | func testTable(key, value string, begin, end int, idx uint32) *table { 23 | mem := newHashMap(64 << 20) 24 | for ; begin < end; begin++ { 25 | key := []byte(fmt.Sprintf("%s%d", key, begin)) 26 | value := []byte(fmt.Sprintf("%s%d", value, begin)) 27 | mem.Set(key, value) 28 | } 29 | mem.toDisk("./", idx) 30 | return newTable("./", idx) 31 | } 32 | 33 | func testValueExist(key, value string, tb *table, begin, end int, t *testing.T) { 34 | for ; begin < end; begin++ { 35 | key := []byte(fmt.Sprintf("%s%d", key, begin)) 36 | value := []byte(fmt.Sprintf("%s%d", value, begin)) 37 | inv, exist := tb.Get(key) 38 | if !exist { 39 | t.Fatalf("%s value not found", string(value)) 40 | } 41 | if bytes.Compare(value, inv) != 0 { 42 | t.Fatalf("expected value %s but got %s", string(value), string(inv)) 43 | } 44 | } 45 | } 46 | 47 | func removeTestTable(idx uint32) { 48 | os.Remove(fmt.Sprintf("./%d.table", idx)) 49 | } 50 | func TestBuilder(t *testing.T) { 51 | t1 := testTable("hello", "value", 1, 100, 1) 52 | t2 := testTable("hello", "schoolboy", 101, 200, 2) 53 | builder := newTableMergeBuilder(int(t1.size + t2.size)) 54 | t1.SeekBegin() 55 | t2.SeekBegin() 56 | builder.append(t1.fp, int64(t1.fileInfo.metaOffset)) 57 | builder.append(t2.fp, int64(t2.fileInfo.metaOffset)) 58 | builder.mergeHashMap(t1.offsetMap, 0) 59 | builder.mergeHashMap(t2.offsetMap, uint32(t1.fileInfo.metaOffset)) 60 | buf := builder.finish() 61 | fp, _ := os.Create("3.table") 62 | fp.Write(buf) 63 | t3 := newTable("./", 3) 64 | testValueExist("hello", "value", t3, 1, 100, t) 65 | testValueExist("hello", "schoolboy", t3, 101, 200, t) 66 | removeTestTable(1) 67 | removeTestTable(2) 68 | removeTestTable(3) 69 | } 70 | -------------------------------------------------------------------------------- /options.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | type Options struct { 16 | NoOfL0Files int 17 | memtablesize int 18 | path string 19 | maxL1Size int 20 | } 21 | 22 | func DefaultOptions() Options { 23 | return Options{ 24 | 3, 25 | 64 << 20, // default value is robbed from badger. badger is a good inspiration to write key value storage in golang 26 | "./", 27 | 64 << 21, 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /table.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "bytes" 17 | "encoding/binary" 18 | "encoding/gob" 19 | "fmt" 20 | "hash/crc32" 21 | "os" 22 | "sync" 23 | "syscall" 24 | 25 | "github.com/AndreasBriese/bbloom" 26 | ) 27 | 28 | // read only 29 | type table struct { 30 | data []byte 31 | path string 32 | fileInfo *fileInfo 33 | size int64 34 | fp *os.File 35 | stat os.FileInfo 36 | filter *bbloom.Bloom 37 | offsetMap map[uint32]uint32 38 | sync.RWMutex 39 | idx uint32 40 | } 41 | 42 | func newTable(path string, idx uint32) *table { 43 | path = giveTablePath(path, idx) 44 | fp, err := os.OpenFile(path, os.O_RDONLY, 0666) 45 | if err != nil { 46 | panic(fmt.Sprintf("unable to open level files %v", err)) 47 | } 48 | stat, err := os.Stat(path) 49 | if err != nil { 50 | panic("unable to get the file state") 51 | } 52 | data, err := syscall.Mmap(int(fp.Fd()), int64(0), int(stat.Size()), syscall.PROT_READ, syscall.MAP_SHARED) 53 | if err != nil { 54 | panic("unable to mmap") 55 | } 56 | fi := &fileInfo{} 57 | // last 32 byte is file info 58 | //fiData := make([]byte, 32) 59 | //fp.ReadAt(fiData) 60 | fi.Decode(data[stat.Size()-32 : stat.Size()]) 61 | 62 | filter := bbloom.JSONUnmarshal(data[stat.Size()-32-int64(fi.filterSize) : stat.Size()-32]) 63 | metBuf := new(bytes.Buffer) 64 | metBuf.Write(data[fi.metaOffset : stat.Size()-32-int64(fi.filterSize)]) 65 | offsetMap := make(map[uint32]uint32, 0) 66 | decoder := gob.NewDecoder(metBuf) 67 | err = decoder.Decode(&offsetMap) 68 | if err != nil { 69 | panic("unable to decode the map") 70 | } 71 | return &table{ 72 | data: data, 73 | path: path, 74 | fileInfo: fi, 75 | size: stat.Size(), 76 | fp: fp, 77 | stat: stat, 78 | filter: &filter, 79 | offsetMap: offsetMap, 80 | idx: idx, 81 | } 82 | } 83 | 84 | func (t *table) SeekBegin() { 85 | t.fp.Seek(0, 0) 86 | } 87 | 88 | func (t *table) ID() uint32 { 89 | return t.idx 90 | } 91 | 92 | // only get is possible 93 | func (t *table) Get(key []byte) ([]byte, bool) { 94 | c := crc32.New(CastagnoliCrcTable) 95 | c.Write(key) 96 | hash := c.Sum32() 97 | if !t.filterHas(hash) { 98 | return nil, false 99 | } 100 | valueOffset, ok := t.offsetMap[hash] 101 | if !ok { 102 | return nil, false 103 | } 104 | kl := binary.BigEndian.Uint32(t.data[valueOffset : valueOffset+4]) 105 | valueOffset += 4 106 | vl := binary.BigEndian.Uint32(t.data[valueOffset : valueOffset+4]) 107 | valueOffset += 4 108 | valueOffset += kl 109 | return t.data[valueOffset : valueOffset+vl], true 110 | } 111 | 112 | func (t *table) filterHas(hash uint32) bool { 113 | buf := make([]byte, 4) 114 | binary.BigEndian.PutUint32(buf, hash) 115 | return t.filter.Has(buf) 116 | } 117 | func (t *table) iter() *iterator { 118 | return newIterator(t.fp, t.fileInfo.metaOffset) 119 | } 120 | func (t *table) close() { 121 | t.fp.Close() 122 | } 123 | 124 | func (t *table) entries() []uint32 { 125 | entries := make([]uint32, 0) 126 | for key := range t.offsetMap { 127 | entries = append(entries, key) 128 | } 129 | return entries 130 | } 131 | -------------------------------------------------------------------------------- /table_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "bytes" 17 | "fmt" 18 | "os" 19 | "testing" 20 | ) 21 | 22 | func TestTableGet(t *testing.T) { 23 | hashMap := newHashMap(64 << 20) 24 | for i := 0; i < 100; i++ { 25 | key := []byte(fmt.Sprintf("vanakam%d", i)) 26 | value := []byte(fmt.Sprintf("nanbare%d", i)) 27 | hashMap.Set(key, value) 28 | } 29 | hashMap.toDisk("./", 1) 30 | table := newTable("./", 1) 31 | inv, exist := table.Get([]byte(fmt.Sprintf("vanakam%d", 99))) 32 | if !exist { 33 | t.Fatal("key not found in the hashmap") 34 | } 35 | if bytes.Compare([]byte(fmt.Sprintf("nanbare%d", 99)), inv) != 0 { 36 | t.Fatalf("expected value %s but got value %s", "nanbare99", string(inv)) 37 | } 38 | os.Remove("./1.table") 39 | } 40 | -------------------------------------------------------------------------------- /tree.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | // simple binary serach tree to find the maximum lower range of incoming key 16 | 17 | type node struct { 18 | root *node 19 | left *node 20 | right *node 21 | lowerRange uint32 22 | idx []uint32 23 | } 24 | 25 | func (n *node) insert(lowerRange, idx uint32) { 26 | 27 | if n.lowerRange == lowerRange { 28 | n.idx = append(n.idx, idx) 29 | return 30 | } 31 | if n.lowerRange > lowerRange { 32 | if n.left == nil { 33 | n.left = &node{ 34 | left: nil, 35 | right: nil, 36 | root: n, 37 | idx: []uint32{idx}, 38 | lowerRange: lowerRange, 39 | } 40 | return 41 | } 42 | n.left.insert(lowerRange, idx) 43 | return 44 | } 45 | if n.right == nil { 46 | n.right = &node{ 47 | left: nil, 48 | right: nil, 49 | root: n, 50 | idx: []uint32{idx}, 51 | lowerRange: lowerRange, 52 | } 53 | return 54 | } 55 | n.right.insert(lowerRange, idx) 56 | } 57 | 58 | func (n *node) rootNode() *node { 59 | return n.root 60 | } 61 | func (n *node) findLargestLowerRange(r uint32) *node { 62 | if n.lowerRange < r { 63 | if n.right != nil { 64 | return n.right.findLargestLowerRange(r) 65 | } 66 | } 67 | if n.lowerRange > r { 68 | if n.left != nil { 69 | return n.left.findLargestLowerRange(r) 70 | } 71 | } 72 | if n.lowerRange > r { 73 | return nil 74 | } 75 | return n 76 | } 77 | 78 | type tree struct { 79 | root *node 80 | } 81 | 82 | func (n *node) deleteTable(idx uint32) { 83 | i, ok := in_array(idx, n.idx) 84 | if ok { 85 | n.idx[i] = n.idx[len(n.idx)-1] 86 | n.idx = n.idx[:len(n.idx)-1] 87 | if len(n.idx) != 0 { 88 | return 89 | } 90 | if n.right != nil { 91 | n = n.right 92 | return 93 | } 94 | n = n.left 95 | return 96 | } 97 | 98 | if n.right != nil { 99 | n.right.deleteTable(idx) 100 | } 101 | if n.left != nil { 102 | n.left.deleteTable(idx) 103 | } 104 | } 105 | 106 | func newTree() *tree { 107 | return &tree{} 108 | } 109 | 110 | func (t *tree) insert(lowerRange, idx uint32) { 111 | if t.root == nil { 112 | t.root = &node{ 113 | lowerRange: lowerRange, 114 | idx: []uint32{idx}, 115 | left: nil, 116 | right: nil, 117 | root: t.root, 118 | } 119 | return 120 | } 121 | t.root.insert(lowerRange, idx) 122 | } 123 | 124 | func (t *tree) deleteTable(idx uint32) { 125 | i, ok := in_array(idx, t.root.idx) 126 | if ok { 127 | t.root.idx[i] = t.root.idx[len(t.root.idx)-1] 128 | t.root.idx = t.root.idx[:len(t.root.idx)-1] 129 | if len(t.root.idx) != 0 { 130 | return 131 | } 132 | if t.root.right != nil { 133 | t.root = t.root.right 134 | return 135 | } 136 | t.root = t.root.left 137 | return 138 | } 139 | if t.root.right != nil { 140 | t.root.right.deleteTable(idx) 141 | } 142 | if t.root.left != nil { 143 | t.root.left.deleteTable(idx) 144 | } 145 | } 146 | 147 | func (t *tree) findLargestLowerRange(r uint32) *node { 148 | if t.root == nil { 149 | return nil 150 | } 151 | if t.root.lowerRange < r { 152 | if t.root.right != nil { 153 | n := t.root.right.findLargestLowerRange(r) 154 | if n != nil { 155 | return n 156 | } 157 | } 158 | } 159 | if t.root.lowerRange > r { 160 | if t.root.left != nil { 161 | return t.root.left.findLargestLowerRange(r) 162 | } 163 | } 164 | if t.root.lowerRange > r { 165 | return nil 166 | } 167 | return t.root 168 | } 169 | 170 | func (t *tree) findAllLargestRange(r uint32) []*node { 171 | //TODO: it's a naive implementation. 172 | //It has to be changed to some stack based finding in the one iteration itself 173 | //instead of looping several time. 174 | //anyway I don't think so that It'll bring much performance that's why I kept it simple(Big lie I'm too lazy to do it) 175 | //It is good have that stack based finding 176 | 177 | nodes := []*node{} 178 | for { 179 | n := t.findLargestLowerRange(r) 180 | if n == nil { 181 | break 182 | } 183 | nodes = append(nodes, n) 184 | r = n.lowerRange - 1 185 | } 186 | 187 | return nodes 188 | } 189 | -------------------------------------------------------------------------------- /tree_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 sch00lb0y. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package aran 14 | 15 | import ( 16 | "testing" 17 | ) 18 | 19 | func TestLowerRange(t *testing.T) { 20 | tr := newTree() 21 | tr.insert(45, 1) 22 | tr.insert(20, 2) 23 | tr.insert(80, 3) 24 | tr.insert(70, 4) 25 | tr.insert(50, 5) 26 | n := tr.findLargestLowerRange(72) 27 | if n.lowerRange != 70 { 28 | t.Fatalf("expected 70 but got %d", n.lowerRange) 29 | } 30 | n = tr.findLargestLowerRange(20) 31 | if n.lowerRange != 20 { 32 | t.Fatalf("expected 20 but got %d", n.lowerRange) 33 | } 34 | n = tr.findLargestLowerRange(92) 35 | if n.lowerRange != 80 { 36 | t.Fatalf("expected 80 but got %d", n.lowerRange) 37 | } 38 | n = tr.findLargestLowerRange(69) 39 | if n.lowerRange != 50 { 40 | t.Fatalf("expected 50 but got %d", n.lowerRange) 41 | } 42 | n = tr.findLargestLowerRange(2) 43 | if n != nil { 44 | t.Fatalf("expected nil node but got %v", n) 45 | } 46 | 47 | ns := tr.findAllLargestRange(72) 48 | if len(ns) != 4 { 49 | t.Fatalf("expected 4 but got %d", len(ns)) 50 | } 51 | for i := range ns { 52 | if i == 0 { 53 | continue 54 | } 55 | if ns[i].lowerRange > ns[i-1].lowerRange { 56 | t.Fatalf("expected in decrement order") 57 | } 58 | } 59 | } 60 | 61 | func TestFindAllLargestRange(t *testing.T) { 62 | tr := newTree() 63 | tr.insert(45, 1) 64 | tr.insert(45, 2) 65 | tr.insert(45, 3) 66 | res := tr.findAllLargestRange(46) 67 | if len(res[0].idx) != 3 { 68 | t.Fatalf("expected 3 but got %d", len(res[0].idx)) 69 | } 70 | } 71 | 72 | func TestDeleteTable(t *testing.T) { 73 | tr := newTree() 74 | tr.insert(34, 1) 75 | tr.insert(32, 5) 76 | tr.insert(31, 4) 77 | tr.insert(34, 20) 78 | tr.insert(32, 24) 79 | tr.insert(31, 10) 80 | tr.deleteTable(1) 81 | tr.deleteTable(5) 82 | tr.deleteTable(4) 83 | tr.deleteTable(20) 84 | tr.deleteTable(24) 85 | tr.deleteTable(10) 86 | if tr.root != nil { 87 | t.Fatalf("expected root to be nil but got %+v", tr.root) 88 | } 89 | } 90 | --------------------------------------------------------------------------------