├── .gitignore ├── src ├── main │ └── java │ │ └── com │ │ └── lodborg │ │ └── btree │ │ ├── SegmentManager.java │ │ ├── CachingManager.java │ │ ├── LocalDiskManager.java │ │ ├── Segment.java │ │ └── PackedMemoryArray.java └── test │ └── java │ └── com │ └── lodborg │ └── btree │ ├── PackedMemoryArrayTest.java │ └── SegmentTest.java ├── README.md ├── LICENSE └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | target 3 | tmp -------------------------------------------------------------------------------- /src/main/java/com/lodborg/btree/SegmentManager.java: -------------------------------------------------------------------------------- 1 | package com.lodborg.btree; 2 | 3 | import java.io.Serializable; 4 | 5 | public interface SegmentManager { 6 | Segment fetch(long index); 7 | boolean persist(Segment segment); 8 | void remove(long index); 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/com/lodborg/btree/CachingManager.java: -------------------------------------------------------------------------------- 1 | package com.lodborg.btree; 2 | 3 | import com.lodborg.cache.LRUCache; 4 | 5 | import java.io.Serializable; 6 | 7 | public class CachingManager implements SegmentManager { 8 | LRUCache> cache; 9 | SegmentManager realManager; 10 | 11 | public CachingManager(int capacity, SegmentManager realManager){ 12 | cache = new LRUCache<>(capacity); 13 | this.realManager = realManager; 14 | } 15 | 16 | @Override 17 | public Segment fetch(long index) { 18 | Segment segment = cache.get(index); 19 | if (segment != null) 20 | return segment; 21 | 22 | return realManager.fetch(index); 23 | } 24 | 25 | @Override 26 | public boolean persist(Segment segment) { 27 | cache.put(segment.id, segment); 28 | return realManager.persist(segment); 29 | } 30 | 31 | @Override 32 | public void remove(long index) { 33 | realManager.remove(index); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cache-Oblivious B-Trees 2 | 3 | __*Currently under construction*__ 4 | 5 | Eventually, here will emerge an implementation of a Cache-Oblivious B-Tree, that performs efficiently without prior knowledge of the memory hierarchy. Essentially, the main idea is to build a van Emde Boas layout on top of a Packed Memory Array. The result is a binary search algorithm that takes advantage of cache locality and minimizes the amount of external memory reads. 6 | 7 | Sit back, enjoy a cup of coffee and maybe have a look at some links on the topic: 8 | * The papers that sparked my interest in the topic: [here](http://erikdemaine.org/papers/CacheObliviousBTrees_SICOMP/paper.pdf) and [here](http://erikdemaine.org/papers/FOCS2000b/paper.pdf) 9 | * A paper on [Adaptive Packed Memory Arrays](https://www3.cs.stonybrook.edu/~bender/newpub/BenderHu07-TODS.pdf) (or packed memory arrays on steroids) 10 | * Or an [MIT lecture](https://www.youtube.com/watch?v=V3omVLzI0WE) on the topic given by the one and only Erik Demaine. Highly recommended! This guy is a legend. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 lodborg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 5 | 4.0.0 6 | com.lodborg.btree 7 | btree 8 | jar 9 | 0.0.1 10 | Cache-Oblivious B-Tree 11 | 12 | 13 | junit 14 | junit 15 | 4.12 16 | test 17 | 18 | 19 | com.github.lodborg 20 | lru-cache 21 | 8d26192 22 | 23 | 24 | 25 | 26 | 27 | 28 | org.apache.maven.plugins 29 | maven-compiler-plugin 30 | 3.3 31 | 32 | 1.8 33 | 1.8 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | jitpack.io 42 | https://jitpack.io 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /src/main/java/com/lodborg/btree/LocalDiskManager.java: -------------------------------------------------------------------------------- 1 | package com.lodborg.btree; 2 | 3 | import java.io.*; 4 | 5 | public class LocalDiskManager implements SegmentManager { 6 | private File dir; 7 | 8 | public LocalDiskManager(String path){ 9 | this.dir = new File(path); 10 | if (!dir.exists() || !dir.isDirectory()) 11 | throw new IllegalArgumentException("The specified path does not exist or is not a directory."); 12 | } 13 | 14 | @Override 15 | public Segment fetch(long index) { 16 | File file = getSegmentFileFromIndex(index); 17 | if (!file.exists()) 18 | return null; 19 | ObjectInputStream stream = null; 20 | try { 21 | stream = new ObjectInputStream(new FileInputStream(file)); 22 | return (Segment) stream.readObject(); 23 | } catch (IOException | ClassNotFoundException e) { 24 | return null; 25 | } finally { 26 | if (stream != null) { 27 | try { 28 | stream.close(); 29 | } catch (IOException e) { 30 | e.printStackTrace(); 31 | } 32 | } 33 | } 34 | } 35 | 36 | @Override 37 | public boolean persist(Segment segment) { 38 | File file = getSegmentFileFromIndex(segment.id); 39 | try { 40 | ObjectOutputStream stream = new ObjectOutputStream(new FileOutputStream(file)); 41 | stream.writeObject(segment); 42 | } catch (IOException e) { 43 | return false; 44 | } 45 | return true; 46 | } 47 | 48 | @Override 49 | public void remove(long index) { 50 | File file = getSegmentFileFromIndex(index); 51 | if (file.exists()) 52 | file.delete(); 53 | } 54 | 55 | private File getSegmentFileFromIndex(long index){ 56 | return new File(dir.getAbsolutePath() + File.separator + "segment_"+index+".srl"); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/test/java/com/lodborg/btree/PackedMemoryArrayTest.java: -------------------------------------------------------------------------------- 1 | package com.lodborg.btree; 2 | 3 | import org.junit.Test; 4 | import static org.junit.Assert.assertEquals; 5 | import static org.junit.Assert.assertArrayEquals; 6 | 7 | public class PackedMemoryArrayTest { 8 | @Test 9 | public void test_shrinkAndExpand(){ 10 | PackedMemoryArray arr = new PackedMemoryArray<>("tmp", 8); 11 | arr.add(1, 10); 12 | arr.add(2, 30); 13 | arr.add(2, 20); 14 | arr.add(7, 60); 15 | arr.add(6, 40); 16 | arr.add(7, 50); 17 | arr.add(5, 35); 18 | assertArrayEquals(new String[]{ "null,10,20,30,35,40,50,60" }, arr.print()); 19 | arr.add(1, 0); 20 | assertArrayEquals(new String[]{ 21 | "null,0,null,10,null,20,null,30", 22 | "null,35,null,40,null,50,null,60" 23 | }, arr.print()); 24 | arr.set(9, null); 25 | assertArrayEquals(new String[]{ 26 | "null,0,null,10,null,20,null,30", 27 | "null,null,null,40,null,50,null,60" 28 | }, arr.print()); 29 | arr.set(13, null); 30 | assertArrayEquals(new String[]{ "null,null,0,10,20,30,40,60" }, arr.print()); 31 | } 32 | 33 | @Test 34 | public void test_dontShrinkIfRemovingFromDifferentSegments(){ 35 | PackedMemoryArray arr = new PackedMemoryArray<>("tmp", 8); 36 | arr.add(1, 10); 37 | arr.add(2, 30); 38 | arr.add(2, 20); 39 | arr.add(7, 60); 40 | arr.add(6, 40); 41 | arr.add(7, 50); 42 | arr.add(5, 35); 43 | arr.add(1, 0); 44 | arr.set(9, null); 45 | assertArrayEquals(new String[]{ 46 | "null,0,null,10,null,20,null,30", 47 | "null,null,null,40,null,50,null,60" 48 | }, arr.print()); 49 | arr.set(3, null); 50 | assertArrayEquals(new String[]{ 51 | "null,0,null,null,null,20,null,30", 52 | "null,null,null,40,null,50,null,60" 53 | }, arr.print()); 54 | arr.add(11, 35); 55 | assertArrayEquals(new String[]{ 56 | "null,0,null,null,null,20,null,30", 57 | "null,null,null,35,40,50,null,60" 58 | }, arr.print()); 59 | } 60 | 61 | private void print(PackedMemoryArray arr){ 62 | for (String str: arr.print()) 63 | System.out.println(str); 64 | System.out.println(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/test/java/com/lodborg/btree/SegmentTest.java: -------------------------------------------------------------------------------- 1 | package com.lodborg.btree; 2 | 3 | import org.junit.Ignore; 4 | import org.junit.Test; 5 | import static org.junit.Assert.assertEquals; 6 | 7 | public class SegmentTest { 8 | @Test 9 | public void test_shiftRightAfterInsert(){ 10 | Segment arr = new Segment<>(0, 4); 11 | arr.add(1, 10); 12 | assertEquals("null,10,null,null", arr.print()); 13 | assertEquals(1, arr.getCount()); 14 | arr.add(1, 5); 15 | assertEquals("null,5,10,null", arr.print()); 16 | assertEquals(2, arr.getCount()); 17 | arr.add(1, 0); 18 | assertEquals("null,0,5,10", arr.print()); 19 | assertEquals(3, arr.getCount()); 20 | } 21 | 22 | @Test 23 | public void test_shiftLeftAfterInsert(){ 24 | Segment arr = new Segment<>(0, 4); 25 | arr.add(2, 10); 26 | assertEquals("null,null,10,null", arr.print()); 27 | assertEquals(1, arr.getCount()); 28 | arr.add(3, 20); 29 | assertEquals("null,null,10,20", arr.print()); 30 | assertEquals(2, arr.getCount()); 31 | arr.add(3, 15); 32 | assertEquals("null,10,15,20", arr.print()); 33 | assertEquals(3, arr.getCount()); 34 | } 35 | 36 | @Test 37 | public void test_insertImmediatelyToLeftInstead(){ 38 | Segment arr = new Segment<>(0, 4); 39 | arr.add(2, 10); 40 | assertEquals("null,null,10,null", arr.print()); 41 | assertEquals(1, arr.getCount()); 42 | arr.add(3, 20); 43 | assertEquals("null,null,10,20", arr.print()); 44 | assertEquals(2, arr.getCount()); 45 | arr.add(2, 5); 46 | assertEquals("null,5,10,20", arr.print()); 47 | assertEquals(3, arr.getCount()); 48 | } 49 | 50 | @Test 51 | public void test_removeElement(){ 52 | Segment arr = new Segment<>(0, 4); 53 | arr.add(2, 10); 54 | assertEquals("null,null,10,null", arr.print()); 55 | assertEquals(1, arr.getCount()); 56 | arr.add(3, 20); 57 | assertEquals("null,null,10,20", arr.print()); 58 | assertEquals(2, arr.getCount()); 59 | arr.set(2, null); 60 | assertEquals("null,null,null,20", arr.print()); 61 | assertEquals(1, arr.getCount()); 62 | } 63 | 64 | @Test 65 | public void test_removeLastElement(){ 66 | Segment arr = new Segment<>(0, 4); 67 | arr.add(2, 10); 68 | assertEquals("null,null,10,null", arr.print()); 69 | assertEquals(1, arr.getCount()); 70 | arr.set(2, null); 71 | assertEquals("null,null,null,null", arr.print()); 72 | assertEquals(0, arr.getCount()); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/com/lodborg/btree/Segment.java: -------------------------------------------------------------------------------- 1 | package com.lodborg.btree; 2 | 3 | import java.io.Serializable; 4 | 5 | public class Segment { 6 | public final long id; 7 | private Object[] arr; 8 | private Integer count; 9 | 10 | public Segment(int id, int size){ 11 | this.id = id; 12 | arr = new Object[size]; 13 | } 14 | 15 | public T add(int index, T value){ 16 | if (value == null) 17 | throw new IllegalArgumentException("add()-ing a null value. Did you mean to use set() instead?"); 18 | T old = (T)arr[index]; 19 | if (old == null) { 20 | arr[index] = value; 21 | if (count != null) 22 | count++; 23 | return null; 24 | } 25 | 26 | int freeIndex = getNextFreeIndexRight(index); 27 | if (freeIndex != -1){ 28 | for (int i=freeIndex; i>index; i--){ 29 | arr[i] = arr[i-1]; 30 | } 31 | arr[index] = value; 32 | } else { 33 | freeIndex = getNextFreeIndexLeft(index); 34 | if (freeIndex == -1) 35 | throw new IllegalArgumentException("Segment is full"); 36 | for (int i=freeIndex; i= 0 && arr[start] != null) { 70 | start--; 71 | } 72 | if (start < 0) { 73 | return -1; 74 | } 75 | return start; 76 | } 77 | 78 | public T get(int index){ 79 | return (T)arr[index]; 80 | } 81 | 82 | public int getCount(){ 83 | if (count == null) { 84 | count = 0; 85 | for (Object elem : arr) { 86 | if (elem != null) 87 | count++; 88 | } 89 | } 90 | return count; 91 | } 92 | 93 | protected String print(){ 94 | StringBuilder res = new StringBuilder(); 95 | res.append(arr[0]); 96 | for (int i=1; i { 6 | int segmentSize; 7 | int segmentCount; 8 | int height; 9 | SegmentManager manager; 10 | 11 | public PackedMemoryArray(String dir, int segmentSize){ 12 | this.segmentSize = segmentSize; 13 | segmentCount = 1; 14 | height = 1; 15 | manager = new CachingManager<>(3, new LocalDiskManager<>(dir)); 16 | manager.persist(new Segment<>(0, segmentSize)); 17 | } 18 | 19 | public T get(long index){ 20 | long segment = index / segmentSize; 21 | int offset = (int)(index % segmentSize); 22 | return manager.fetch(segment).get(offset); 23 | } 24 | 25 | public void add(long index, T value){ 26 | long id = index / segmentSize; 27 | int offset = (int)(index % segmentSize); 28 | Segment segment = manager.fetch(id); 29 | segment.add(offset, value); 30 | if (mustRebalance(segment)){ 31 | rebalance(segment); 32 | } else { 33 | manager.persist(segment); 34 | } 35 | } 36 | 37 | public void set(long index, T value){ 38 | _set(index, value); 39 | Segment segment = manager.fetch(index / segmentSize); 40 | if (mustRebalance(segment)) 41 | rebalance(segment); 42 | /*long id = index / segmentSize; 43 | int offset = (int)(index % segmentSize); 44 | Segment segment = manager.fetch(id); 45 | T old = segment.set(offset, value); 46 | if (old != null){ 47 | manager.persist(segment); 48 | return; 49 | } 50 | 51 | if (mustRebalance(segment)) 52 | rebalance(segment); 53 | else 54 | manager.persist(segment);*/ 55 | } 56 | 57 | private void _set(long index, T value){ 58 | long id = index / segmentSize; 59 | int offset = (int)(index % segmentSize); 60 | manager.fetch(id).set(offset, value); 61 | } 62 | 63 | private boolean mustRebalance(Segment segment) { 64 | double density = (double)segment.getCount()/segmentSize; 65 | double[] target = getTargetDensity(height-1); 66 | return density <= target[0] && height > 1 || density >= target[1]; 67 | } 68 | 69 | private void rebalance(Segment segment) { 70 | int depth = height-2; 71 | int divisor = 2; 72 | long rangeFrom = segment.id; 73 | long rangeTo = segment.id; 74 | long count = segment.getCount(); 75 | double density = (double)count/segmentSize; 76 | double[] target = getTargetDensity(height-1); 77 | 78 | while (depth >= 0) { 79 | long start = rangeFrom / divisor * divisor; 80 | if (start == rangeFrom) { 81 | for (long i = rangeTo + 1; i < rangeTo + divisor; i++) 82 | count += manager.fetch(i).getCount(); 83 | } else { 84 | for (long i = start; i < rangeFrom; i++) 85 | count += manager.fetch(i).getCount(); 86 | } 87 | rangeFrom = start; 88 | rangeTo = start + divisor - 1; 89 | density = (double) count / (divisor * segmentSize); 90 | target = getTargetDensity(depth); 91 | if (density > target[0] && density < target[1]) 92 | break; 93 | depth--; 94 | divisor *= 2; 95 | } 96 | 97 | if (depth < 0){ 98 | if (density >= target[1]) { 99 | for (int i=segmentCount; i(i, segmentSize)); 101 | expand(); 102 | } else { 103 | shrink(); 104 | long middle = rangeFrom + (rangeTo-rangeFrom); 105 | for (long i=rangeTo; i>middle; i--) 106 | manager.remove(i); 107 | } 108 | } else { 109 | shuffle(rangeFrom*segmentSize, rangeTo*segmentSize); 110 | } 111 | } 112 | 113 | private void expand(){ 114 | long count = squash(0, segmentCount*segmentSize); 115 | height++; 116 | segmentCount *= 2; 117 | shuffle(0, segmentCount*segmentSize, count); 118 | } 119 | 120 | private void shrink(){ 121 | long count = squash(0, segmentCount*segmentSize); 122 | height--; 123 | segmentCount /= 2; 124 | shuffle(0, segmentCount*segmentSize, count); 125 | } 126 | 127 | /** 128 | * Reshuffles all non-empty elements in that given range, so that they have a 129 | * constant gap between them. The last element in the range is always set. 130 | * @param from Inclusive 131 | * @param to Exclusive 132 | */ 133 | private void shuffle(long from, long to){ 134 | shuffle(from, to, squash(from, to)); 135 | } 136 | 137 | /** 138 | * Shuffles all non-empty elements in an already squashed range, so that 139 | * they have a constant gap between them. The last element in the range is 140 | * always set. 141 | * @param from Inclusive 142 | * @param to Exclusive 143 | * @param count The amount of non-empty elements in the range, squashed 144 | * at the beginning of the range. 145 | */ 146 | private void shuffle(long from, long to, long count){ 147 | long j = count + from - 1; 148 | long i = to-1; 149 | long remaining = i - j; 150 | long gap = remaining / (j-from+1); 151 | for (; j>=from; j--){ 152 | _set(i, get(j)); 153 | _set(j, null); 154 | i -= gap+1; 155 | if ((i-from+1)% (gap+1) != 0) 156 | i--; 157 | } 158 | } 159 | 160 | /** 161 | * Moves all non-empty elements in the given range to the beginning of the range. 162 | * @param from Inclusive 163 | * @param to Exclusive 164 | * @return The amount of non-empty elements in the range. 165 | */ 166 | private long squash(long from, long to){ 167 | long i = from; 168 | long j = i; 169 | for ( ; i